In [1]:
!pip3 install datasets transformers -q
!pip3 install wandb --upgrade -q

[K     |████████████████████████████████| 342 kB 12.6 MB/s 
[K     |████████████████████████████████| 4.2 MB 52.0 MB/s 
[K     |████████████████████████████████| 212 kB 52.5 MB/s 
[K     |████████████████████████████████| 84 kB 3.5 MB/s 
[K     |████████████████████████████████| 136 kB 56.4 MB/s 
[K     |████████████████████████████████| 1.1 MB 43.9 MB/s 
[K     |████████████████████████████████| 127 kB 56.5 MB/s 
[K     |████████████████████████████████| 6.6 MB 37.2 MB/s 
[K     |████████████████████████████████| 596 kB 54.6 MB/s 
[K     |████████████████████████████████| 144 kB 51.6 MB/s 
[K     |████████████████████████████████| 271 kB 52.9 MB/s 
[K     |████████████████████████████████| 94 kB 3.2 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
datascience 0.10.6 requires folium==0.2.1, but you have folium 0.8.3 which is incompatible.

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm.notebook import tqdm
from sklearn.preprocessing import MultiLabelBinarizer

from datasets import load_dataset
import random
from sklearn import metrics, model_selection, preprocessing
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup

In [3]:
torch.cuda.empty_cache()

In [4]:
def seed_everything(seed=73):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    # some cudnn methods can be random even after fixing the seed unless you tell it to be deterministic
    torch.backends.cudnn.deterministic = True

seed_everything(1234)

In [5]:
import wandb

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

**Dataset**

In [6]:
train = load_dataset('eurlex', 'eurlex57k', split='train').to_pandas()
test = load_dataset('eurlex', 'eurlex57k', split='test').to_pandas()
val = load_dataset('eurlex', 'eurlex57k', split='validation').to_pandas()
print(train.shape, test.shape, val.shape)

Downloading builder script:   0%|          | 0.00/1.97k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/955 [00:00<?, ?B/s]

Downloading and preparing dataset eurlex/eurlex57k (download: 47.96 MiB, generated: 201.79 MiB, post-processed: Unknown size, total: 249.75 MiB) to /root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34...


Downloading data:   0%|          | 0.00/50.3M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/45000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/6000 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/6000 [00:00<?, ? examples/s]

Dataset eurlex downloaded and prepared to /root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34. Subsequent calls will reuse this data.


Reusing dataset eurlex (/root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34)
Reusing dataset eurlex (/root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34)


(45000, 4) (6000, 4) (6000, 4)


In [7]:
train['text'] = train['title'] + " " + train['text']
val['text'] = val['title'] + " " + val['text']
test['text'] = test['title'] + " " + test['text']

In [8]:
train.iloc[23452:23459]

Unnamed: 0,celex_id,title,text,eurovoc_concepts
23452,32007D0583,2007/583/EC: Commission Decision of 21 June 20...,2007/583/EC: Commission Decision of 21 June 20...,"[1074, 2563, 5541, 889]"
23453,31985R0220,Commission Regulation (EEC) No 220/85 of 29 Ja...,Commission Regulation (EEC) No 220/85 of 29 Ja...,"[1309, 2519]"
23454,32002R0434,Commission Regulation (EC) No 434/2002 of 8 Ma...,Commission Regulation (EC) No 434/2002 of 8 Ma...,"[1423, 2734, 2803, 3299]"
23455,32008R1107,Commission Regulation (EC) No 1107/2008 of 7 N...,Commission Regulation (EC) No 1107/2008 of 7 N...,"[1631, 2871, 2897, 5726, 764, 893]"
23456,32006R1918,Commission Regulation (EC) No 1918/2006 of 20 ...,Commission Regulation (EC) No 1918/2006 of 20 ...,"[1264, 161, 1644, 2771, 3191, 4578]"
23457,32003R2291,Commission Regulation (EC) No 2291/2003 of 23 ...,Commission Regulation (EC) No 2291/2003 of 23 ...,"[3568, 4170]"
23458,32013R0207,Commission Implementing Regulation (EU) No 207...,Commission Implementing Regulation (EU) No 207...,"[1224, 1406, 2211, 2965, 4117, 4146, 5034, 525..."


In [8]:
from sklearn.preprocessing import MultiLabelBinarizer

def one_hot_encoder(data, classes=None):
    if classes is not None:
        mlb = MultiLabelBinarizer(classes=classes)
    else:
        mlb = MultiLabelBinarizer()
    ohe = pd.DataFrame(mlb.fit_transform(data["eurovoc_concepts"]),columns=mlb.classes_)
    return ohe

In [9]:
class Dataset:
    def __init__(self, texts, labels, tokenizer, max_len):
        self.texts = texts
        self.labels = labels

        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.texts)

    def __getitem__(self, index):
        text = self.texts[index]
        label = self.labels[index]

        inputs = self.tokenizer.__call__(text,
                                        None,
                                        add_special_tokens=True,
                                        max_length=self.max_len,
                                        padding="max_length",
                                        truncation=True,
                                        )
        ids = inputs["input_ids"]
        mask = inputs["attention_mask"]

        return {
            "ids": torch.tensor(ids, dtype=torch.long),
            "mask": torch.tensor(mask, dtype=torch.long),
            "labels": torch.tensor(label, dtype=torch.long)
        }

**Classes subsets:**

In [10]:
def get_classes(data, min_class_ex=0, max_class_ex=50000):
    ohe_data = one_hot_encoder(data)
    indices = list(ohe_data.sum() > min_class_ex) * (ohe_data.sum() < max_class_ex)
    return list(ohe_data.columns[indices])

print(get_classes(train, 50))

['1005', '1015', '1019', '1021', '1025', '1026', '1031', '1035', '1048', '1052', '1069', '1074', '1076', '1085', '1086', '1088', '1091', '1092', '11', '1102', '1104', '1115', '1116', '1117', '1118', '1119', '1120', '1129', '1134', '1136', '114', '1144', '1158', '1159', '1164', '1182', '1188', '12', '1201', '1206', '1224', '1233', '1234', '1250', '1252', '1255', '1258', '1261', '1264', '1270', '1277', '1284', '1294', '13', '1308', '1309', '1318', '1329', '133', '1338', '1339', '1355', '1362', '1368', '1372', '1374', '1377', '1391', '1394', '1406', '1417', '1425', '1442', '1445', '1460', '1474', '1476', '1486', '1501', '1504', '1505', '1509', '1511', '1518', '1519', '1520', '1524', '1532', '1542', '1552', '1561', '1562', '1565', '1567', '1570', '1571', '1590', '1591', '1595', '1596', '1598', '1602', '1603', '1604', '1605', '1608', '1609', '161', '1616', '1631', '1637', '1638', '164', '1642', '1644', '1647', '1654', '1662', '1667', '1684', '1686', '1707', '1708', '171', '1712', '1729', '1

In [11]:
def get_data(train, val, test, min_class_ex=0,  max_class_ex=58000, classes=None):
    """
    if classes == None 
        gets ohe_data for classes with <=max_class_ex, >=min_class_ex example per class (in train)
    else
        gets ohe_data for given classes
    """
    data = pd.concat([train, val, test], ignore_index=True).fillna(0)

    if classes is None:
        classes = get_classes(train, min_class_ex, max_class_ex)

    n_classes = len(classes)

    ohe_data = one_hot_encoder(data, classes)

    # delete zero-label examples
    # 
    # zero_labels = np.array(ohe_data.sum(axis=1) < 1)
    # zero_labels_idx = np.array(range(len(zero_labels)))[zero_labels]
    # ohe_data = ohe_data.drop(zero_labels_idx).reset_index(drop=True)

    # add new label = zero_lables
    # 
    ohe_data["zero labels"] = [0] * len(ohe_data)
    zero_labels = np.array(ohe_data.sum(axis=1) < 1)
    ohe_data["zero labels"][zero_labels] = 1

    ohe_data.columns = list(range(n_classes + 1))
    ohe_data.insert(0, "text", data["text"])

    n_samples = ohe_data.shape[0]

    ohe_train = ohe_data.iloc[:int(0.8 * n_samples)]
    ohe_val = ohe_data.iloc[int(0.8 * n_samples):int(0.9 * n_samples)]
    ohe_test = ohe_data.iloc[int(0.9 * n_samples):]

    return ohe_train, ohe_val, ohe_test, n_classes

In [12]:
ex_train, _, _ , _ = get_data(train[:10], val[:10], test[:10], classes=['1026', '1048'])
print(ex_train)

                                                 text  0  1  2
0   Commission Implementing Regulation (EU) No 727...  0  0  1
1   Regulation (EEC) No 2481/75 of the Council of ...  0  0  1
2   2010/8/EU, Euratom: Commission Decision of 22 ...  0  0  1
3   82/211/EEC: Commission Decision of 17 March 19...  0  0  1
4   96/84/Euratom, ECSC, EC: Commission Decision o...  1  1  0
5   Commission Regulation (EC) No 1298/98 of 23 Ju...  0  0  1
6   Commission Regulation (EEC) No 2503/86 of 5 Au...  0  0  1
7   Commission Regulation (EC) No 2317/2001 of 29 ...  0  0  1
8   93/422/EEC: Commission Decision of 22 June 199...  0  0  1
9   Commission Regulation (EEC) No 1233/88 of 4 Ma...  0  0  1
10  Council Regulation (EC) No 1400/1999 of 24 Jun...  0  0  1
11  Commission Implementing Regulation (EU) No 381...  0  0  1
12  Council Joint Action 2007/732/CFSP of 13 Novem...  0  0  1
13  Commission Regulation (EEC) No 2239/91 of 26 J...  0  0  1
14  Commission Regulation (EC) No 2007/95 of 18 Au...  

  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


**Metrics**

In [13]:
def rp_k_metric(preds, labels, k=5):
    idx = np.argpartition(preds, kth=-k, axis=-1)[:,-k:]
    div_k = np.clip(np.sum(labels, axis=1), 0, k)
    N = labels.shape[0]

    return np.sum(np.sum(np.take_along_axis(labels, idx, axis=-1), axis=-1) / div_k) / N

def ranking_rprecision_score(y_true, y_score, k=5):
    """Precision at rank k
    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Ground truth (true relevance labels).
    y_score : array-like, shape = [n_samples]
        Predicted scores.
    k : int
        Rank.
    Returns
    -------
    precision @k : float
    """
    unique_y = np.unique(y_true)

    if len(unique_y) == 1:
        return ValueError("The score cannot be approximated.")
    elif len(unique_y) > 2:
        raise ValueError("Only supported for two relevance levels.")

    pos_label = unique_y[1]
    n_pos = np.sum(y_true == pos_label)

    order = np.argsort(y_score)[::-1]
    y_true = np.take(y_true, order[:k])
    n_relevant = np.sum(y_true == pos_label)

    # Divide by min(n_pos, k) such that the best achievable score is always 1.0.
    return float(n_relevant) / min(k, n_pos)

def mean_rprecision_k(y_true, y_score, k=5):
    """Mean precision at rank k
    Parameters
    ----------
    y_true : array-like, shape = [n_samples]
        Ground truth (true relevance labels).
    y_score : array-like, shape = [n_samples]
        Predicted scores.
    k : int
        Rank.
    Returns
    -------
    mean precision @k : float
    """

    p_ks = []
    for y_t, y_s in zip(y_true, y_score):
        if np.sum(y_t == 1):
            p_ks.append(ranking_rprecision_score(y_t, y_s, k=k))

    return np.mean(p_ks)


def log_metrics(preds, labels):
    preds = torch.stack(preds)
    preds = preds.cpu().detach().numpy()
    labels = torch.stack(labels)
    labels = labels.cpu().detach().numpy()
    
    precision_micro = metrics.average_precision_score(labels, preds, average="micro")
    rp_k = mean_rprecision_k(labels, preds, 5)

    fpr_micro, tpr_micro, _ = metrics.roc_curve(labels.ravel(), preds.ravel())
    auc_micro = metrics.auc(fpr_micro, tpr_micro)
    
    f1_score_micro = metrics.f1_score(labels, np.array(preds) > 0.2, average='micro')
    f1_score_macro = metrics.f1_score(labels, np.array(preds) > 0.2, average='macro')
    
    return {
        "precision_micro": precision_micro,
        "RP@K": rp_k,
        "auc": auc_micro,
        "f1_score_micro": f1_score_micro,
        "f1_score_macro": f1_score_macro
    }

**Train part:**

In [14]:
sweep_config = {
    'method': 'grid', #grid, random, bayesian
    'metric': {
      'name': 'RP@K',
      'goal': 'maximize'   
    },
    'parameters': {

        'learning_rate': {
            'values': [1e-4]
        },
        'batch_size': {
            'values': [32]
        },
        'epochs':{'value': 5},
        'dropout':{
            'values': [0.4]
        },
        'tokenizer_max_len': {'value': 100},
    }
}

sweep_defaults = {
    'learning_rate': 1e-4,
    'batch_size': 32,
    'epochs': 5,
    'dropout': 0.3,
    'tokenizer_max_len': 100
}

sweep_id = wandb.sweep(sweep_config, project='bhaavnaye')

Create sweep with ID: r5kh1fxj
Sweep URL: https://wandb.ai/arrehova/bhaavnaye/sweeps/r5kh1fxj


In [15]:
def ret_optimizer(model):
    '''
    Taken from Abhishek Thakur's Tez library example: 
    https://github.com/abhishekkrthakur/tez/blob/main/examples/text_classification/binary.py
    '''
    param_optimizer = list(model.named_parameters())
    no_decay = ["bias", "LayerNorm.bias"]
    optimizer_parameters = [
        {
            "params": [
                p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.001,
        },
        {
            "params": [
                p for n, p in param_optimizer if any(nd in n for nd in no_decay)
            ],
            "weight_decay": 0.0,
        },
    ]
    opt = AdamW(optimizer_parameters, lr=wandb.config.learning_rate)
    return opt

def ret_scheduler(optimizer, num_train_steps):
    sch = get_linear_schedule_with_warmup(
        optimizer, num_warmup_steps=0, num_training_steps=num_train_steps)
    return sch

def loss_fn(outputs, labels):
    if labels is None:
        return None
    return nn.BCEWithLogitsLoss()(outputs, labels.float())

In [16]:
class Classifier(nn.Module):
    def __init__(self, n_train_steps, n_classes, do_prob, bert_model):
        super(Classifier, self).__init__()
        self.bert = bert_model
        self.dropout = nn.Dropout(do_prob)
        self.out = nn.Linear(768, n_classes)
        self.n_train_steps = n_train_steps
        self.step_scheduler_after = "batch"

    def forward(self, ids, mask):
        output_1 = self.bert(ids, attention_mask=mask)["pooler_output"]
        output_2 = self.dropout(output_1)
        output = self.out(output_2)
        return output

In [17]:
tokenizer = transformers.SqueezeBertTokenizer.from_pretrained("squeezebert/squeezebert-uncased", do_lower_case=True)

def build_dataset(tokenizer_max_len, train, val, n_labels):
    train_dataset = Dataset(train.text.tolist(), train[range(n_labels)].values.tolist(), tokenizer, tokenizer_max_len)
    val_dataset = Dataset(val.text.tolist(), val[range(n_labels)].values.tolist(), tokenizer, tokenizer_max_len)
    
    return train_dataset, val_dataset 

def build_dataloader(train_dataset, val_dataset, batch_size):
    train_data_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_data_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=True, num_workers=1)

    return train_data_loader, val_data_loader

def ret_model(n_train_steps, do_prob, n_labels):
  model = Classifier(n_train_steps, n_labels, do_prob, bert_model=bert_model)
  return model

Downloading:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/500 [00:00<?, ?B/s]

In [18]:
bert_model = transformers.SqueezeBertModel.from_pretrained("squeezebert/squeezebert-uncased")

Downloading:   0%|          | 0.00/98.7M [00:00<?, ?B/s]

Some weights of the model checkpoint at squeezebert/squeezebert-uncased were not used when initializing SqueezeBertModel: ['cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing SqueezeBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SqueezeBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [19]:
def train_fn(data_loader, model, optimizer, device, scheduler):
    '''
        Modified from Abhishek Thakur's BERT example: 
        https://github.com/abhishekkrthakur/bert-sentiment/blob/master/src/engine.py
    '''

    train_loss = 0.0
    model.train()
    for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
        ids = d["ids"]
        mask = d["mask"]
        targets = d["labels"]

        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        targets = targets.to(device, dtype=torch.float)

        optimizer.zero_grad()
        outputs = model(ids=ids, mask=mask)

        loss = loss_fn(outputs, targets)
        loss.backward()
        train_loss += loss.item()
        optimizer.step()
        scheduler.step()
    return train_loss
    

def eval_fn(data_loader, model, device):
    '''
        Modified from Abhishek Thakur's BERT example: 
        https://github.com/abhishekkrthakur/bert-sentiment/blob/master/src/engine.py
    '''
    eval_loss = 0.0
    model.eval()
    fin_targets = []
    fin_outputs = []
    with torch.no_grad():
        for bi, d in tqdm(enumerate(data_loader), total=len(data_loader)):
            ids = d["ids"]
            mask = d["mask"]
            targets = d["labels"]

            ids = ids.to(device, dtype=torch.long)
            mask = mask.to(device, dtype=torch.long)
            targets = targets.to(device, dtype=torch.float)

            outputs = model(ids=ids, mask=mask)
            loss = loss_fn(outputs, targets)
            eval_loss += loss.item()
            fin_targets.extend(targets)
            fin_outputs.extend(torch.sigmoid(outputs))
    return eval_loss, fin_outputs, fin_targets

In [20]:
def trainer(config=None):
    with wandb.init(config=config):
        config = wandb.config

        train_dataset, val_dataset = build_dataset(config.tokenizer_max_len, train, val, n_labels)
        train_data_loader, val_data_loader = build_dataloader(train_dataset, val_dataset, config.batch_size)
        print("Length of Train Dataloader: ", len(train_data_loader))
        print("Length of Valid Dataloader: ", len(val_data_loader))

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        n_train_steps = int(len(train_dataset) / config.batch_size * 10)

        model = ret_model(n_train_steps, config.dropout, n_labels)
        optimizer = ret_optimizer(model)
        scheduler = ret_scheduler(optimizer, n_train_steps)
        model.to(device)
        model = nn.DataParallel(model)
        wandb.watch(model)
        
        n_epochs = config.epochs

        best_val_loss = 100
        for epoch in tqdm(range(n_epochs)):
            train_loss = train_fn(train_data_loader, model, optimizer, device, scheduler)
            eval_loss, preds, labels = eval_fn(val_data_loader, model, device)
          
            print(log_metrics(preds, labels))

            # # check first 5 labels:
            # print(np.argpartition(torch.stack(preds).cpu().detach().numpy(), kth=-5, axis=-1)[:20,-5:])

            avg_train_loss, avg_val_loss = train_loss / len(train_data_loader), eval_loss / len(val_data_loader)
            wandb.log({
                "epoch": epoch + 1,
                "train_loss": avg_train_loss,
                "val_loss": avg_val_loss
            })
            print("Average Train loss: ", avg_train_loss)
            print("Average Valid loss: ", avg_val_loss)

            if avg_val_loss < best_val_loss:
                best_val_loss = avg_val_loss
                torch.save(model.state_dict(), "./best_model.pt")  
                print("Model saved as current val_loss is: ", best_val_loss)    

**Most frequent classes:**

In [21]:
train, val, test, n_labels = get_data(train, val, test, 450)
print(n_labels)

108


  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


**Few-shot:**

Let's try to delete examples with 0 labels:

In [54]:
def get_data(train, val, test, min_class_ex=0,  max_class_ex=58000, classes=None):
    """
    if classes == None 
        gets ohe_data for classes with <=max_class_ex, >=min_class_ex example per class (in train)
    else
        gets ohe_data for given classes
    """
    data = pd.concat([train, val, test], ignore_index=True).fillna(0)

    if classes is None:
        classes = get_classes(train, min_class_ex, max_class_ex)

    n_classes = len(classes)

    ohe_data = one_hot_encoder(data, classes)

    # delete zero-label examples
    # 
    zero_labels = np.array(ohe_data.sum(axis=1) < 1)
    zero_labels_idx = np.array(range(len(zero_labels)))[zero_labels]
    ohe_data = ohe_data.drop(zero_labels_idx).reset_index(drop=True)

    # add new label = zero_lables
    # 
    # ohe_data["zero labels"] = [0] * len(ohe_data)
    # zero_labels = np.array(ohe_data.sum(axis=1) < 1)
    # ohe_data["zero labels"][zero_labels] = 1

    ohe_data.columns = list(range(n_classes))
    ohe_data.insert(0, "text", data["text"])

    n_samples = ohe_data.shape[0]

    ohe_train = ohe_data.iloc[:int(0.8 * n_samples)]
    ohe_val = ohe_data.iloc[int(0.8 * n_samples):int(0.9 * n_samples)]
    ohe_test = ohe_data.iloc[int(0.9 * n_samples):]

    return ohe_train, ohe_val, ohe_test, n_classes

In [22]:
train = load_dataset('eurlex', 'eurlex57k', split='train').to_pandas()
test = load_dataset('eurlex', 'eurlex57k', split='test').to_pandas()
val = load_dataset('eurlex', 'eurlex57k', split='validation').to_pandas()

Reusing dataset eurlex (/root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34)
Reusing dataset eurlex (/root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34)
Reusing dataset eurlex (/root/.cache/huggingface/datasets/eurlex/eurlex57k/1.1.0/d2fdeaa4fcb5f41394d2ed0317c8541d7f9be85d2d601b9fa586c8b461bc3a34)


In [73]:
# download dataset again
train, val, test, n_labels = get_data(train, val, test, 20, 23) 
print(train.shape)

(1020, 52)


  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


In [74]:
print(train.iloc[:5])

                                                text  0  1  2  3  4  5  6  7  \
0  1.7.2014 EN Official Journal of the European U...  0  0  0  0  0  0  0  0   
1  REGULATION (EEC) No 2481/75 OF THE COUNCIL  of...  0  0  0  0  0  0  0  0   
2  7.1.2010 EN Official Journal of the European U...  1  0  0  0  0  0  0  0   
3  COMMISSION  DECISION\nof 17 March 1982\nestabl...  0  0  0  0  0  0  0  0   
4  COMMISSION DECISION of 8 January 1996 adjustin...  0  0  0  0  0  0  0  0   

   8  ...  41  42  43  44  45  46  47  48  49  50  
0  0  ...   0   0   0   0   0   0   0   0   0   0  
1  0  ...   0   0   0   0   0   0   0   0   0   0  
2  0  ...   0   0   0   0   0   0   0   0   0   0  
3  0  ...   0   0   0   0   0   0   0   0   0   0  
4  0  ...   0   0   0   0   0   0   0   0   0   0  

[5 rows x 52 columns]


In [79]:
wandb.agent(sweep_id, function=trainer, count=6)

[34m[1mwandb[0m: Agent Starting Run: pu4m72dq with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0001
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  32
Length of Valid Dataloader:  4




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.02069871603915753, 'RP@K': 0.109375, 'auc': 0.5201799942093664, 'f1_score_micro': 0.01556420233463035, 'f1_score_macro': 0.0006033182503770739}
Average Train loss:  0.3731649946421385
Average Valid loss:  0.17541339248418808
Model saved as current val_loss is:  0.17541339248418808


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.02109252060509685, 'RP@K': 0.109375, 'auc': 0.5250681126290324, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.14759928709827363
Average Valid loss:  0.11425657942891121
Model saved as current val_loss is:  0.11425657942891121


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.020829976393759046, 'RP@K': 0.109375, 'auc': 0.5186541986332651, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.11532237031497061
Average Valid loss:  0.10308839567005634
Model saved as current val_loss is:  0.10308839567005634


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.020916177295648264, 'RP@K': 0.109375, 'auc': 0.525884010461906, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10772639513015747
Average Valid loss:  0.0996005441993475
Model saved as current val_loss is:  0.0996005441993475


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.020455944142535785, 'RP@K': 0.109375, 'auc': 0.517702620685645, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10498335678130388
Average Valid loss:  0.09836448729038239
Model saved as current val_loss is:  0.09836448729038239


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▂▁▁▁
val_loss,█▂▁▁▁

0,1
epoch,5.0
train_loss,0.10498
val_loss,0.09836


[34m[1mwandb[0m: Agent Starting Run: 4lko14um with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  32
Length of Valid Dataloader:  4




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.021571013059438746, 'RP@K': 0.1171875, 'auc': 0.4964583855762347, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.14975109416991472
Average Valid loss:  0.09769993275403976
Model saved as current val_loss is:  0.09769993275403976


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

{'precision_micro': 0.017319187178322298, 'RP@K': 0.03515625, 'auc': 0.44638030893877556, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.1026195811573416
Average Valid loss:  0.09813623316586018


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.019213890057216326, 'RP@K': 0.0703125, 'auc': 0.49710347183607906, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10295050335116684
Average Valid loss:  0.09740270860493183
Model saved as current val_loss is:  0.09740270860493183


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.02053976039785209, 'RP@K': 0.1171875, 'auc': 0.5045531581363851, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10265758982859552
Average Valid loss:  0.09739855863153934
Model saved as current val_loss is:  0.09739855863153934


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.019954700972527568, 'RP@K': 0.0625, 'auc': 0.5113450381660894, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10214190813712776
Average Valid loss:  0.09724537096917629
Model saved as current val_loss is:  0.09724537096917629


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▁▁▁▁
val_loss,▅█▂▂▁

0,1
epoch,5.0
train_loss,0.10214
val_loss,0.09725


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 3vyov3ip with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.01
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  32
Length of Valid Dataloader:  4




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.019579531678617992, 'RP@K': 0.0703125, 'auc': 0.4998449370117182, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.16921661933884025
Average Valid loss:  0.12530707009136677
Model saved as current val_loss is:  0.12530707009136677


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.01824085265281264, 'RP@K': 0.0703125, 'auc': 0.46419135257325816, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.13280973373912275
Average Valid loss:  0.12342583574354649
Model saved as current val_loss is:  0.12342583574354649


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

{'precision_micro': 0.02429679565985736, 'RP@K': 0.171875, 'auc': 0.5050147128124429, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.14760181843303144
Average Valid loss:  0.1390257142484188


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.02074263757995655, 'RP@K': 0.0625, 'auc': 0.5339933201772079, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.1277790789026767
Average Valid loss:  0.10713105089962482
Model saved as current val_loss is:  0.10713105089962482


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.020892917742477958, 'RP@K': 0.109375, 'auc': 0.5117333013515919, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.11106181051582098
Average Valid loss:  0.09821057505905628
Model saved as current val_loss is:  0.09821057505905628


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▄▅▃▁
val_loss,▆▅█▃▁

0,1
epoch,5.0
train_loss,0.11106
val_loss,0.09821


[34m[1mwandb[0m: Agent Starting Run: 9pbpum71 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  32
Length of Valid Dataloader:  4




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.024558033463733163, 'RP@K': 0.12109375, 'auc': 0.5555809955770705, 'f1_score_micro': 0.04161248374512354, 'f1_score_macro': 0.004776164812087578}
Average Train loss:  0.45021185744553804
Average Valid loss:  0.2194340080022812
Model saved as current val_loss is:  0.2194340080022812


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.024506790598166437, 'RP@K': 0.12109375, 'auc': 0.5493512188798879, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.1457097448874265
Average Valid loss:  0.10185194201767445
Model saved as current val_loss is:  0.10185194201767445


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.026997435463475775, 'RP@K': 0.1171875, 'auc': 0.5406767772580744, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10295572830364108
Average Valid loss:  0.09703612700104713
Model saved as current val_loss is:  0.09703612700104713


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.02606392827115941, 'RP@K': 0.15234375, 'auc': 0.5314305408669717, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10102048283442855
Average Valid loss:  0.09702195040881634
Model saved as current val_loss is:  0.09702195040881634


  0%|          | 0/32 [00:00<?, ?it/s]

  0%|          | 0/4 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.025159507418744717, 'RP@K': 0.1484375, 'auc': 0.5074787606106089, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.10116359312087297
Average Valid loss:  0.0971075389534235


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▂▁▁▁
val_loss,█▁▁▁▁

0,1
epoch,5.0
train_loss,0.10116
val_loss,0.09711


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [84]:
train, val, test, n_labels = get_data(train, val, test, 10, 50) 
print(train.shape)

  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


(15387, 1016)


In [85]:
wandb.agent(sweep_id, function=trainer, count=6)

[34m[1mwandb[0m: Agent Starting Run: w251382c with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  481
Length of Valid Dataloader:  61




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.0022698325038385783, 'RP@K': 0.0109291038308199, 'auc': 0.6166365448617698, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.03591351433155628
Average Valid loss:  0.010999889693177138
Model saved as current val_loss is:  0.010999889693177138


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

{'precision_micro': 0.0022404809574280765, 'RP@K': 0.012064482579303173, 'auc': 0.6163490843325227, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.01143010078180047
Average Valid loss:  0.011049766414111754


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

{'precision_micro': 0.0023294021434502096, 'RP@K': 0.012844513780551221, 'auc': 0.6207136022650326, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011433771402882934
Average Valid loss:  0.011028792525901169


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

{'precision_micro': 0.0022880780769357293, 'RP@K': 0.014621251516727336, 'auc': 0.6161376391894536, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011423408847666208
Average Valid loss:  0.011049921364813555


  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


  0%|          | 0/481 [00:00<?, ?it/s]

  0%|          | 0/61 [00:00<?, ?it/s]

  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


{'precision_micro': 0.0022557820908820497, 'RP@K': 0.01001040041601664, 'auc': 0.6165743276242858, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011412783866659148
Average Valid loss:  0.010981977276015477
Model saved as current val_loss is:  0.010981977276015477


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▁▁▁▁
val_loss,▃█▆█▁

0,1
epoch,5.0
train_loss,0.01141
val_loss,0.01098


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [97]:
train, val, test, n_labels = get_data(train, val, test, 50, 100) 
print(train.shape)

  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


(45600, 300)


In [93]:
wandb.agent(sweep_id, function=trainer, count=6)

[34m[1mwandb[0m: Agent Starting Run: m6ejzshn with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  491
Length of Valid Dataloader:  62




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/491 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

{'precision_micro': 0.005122387911615442, 'RP@K': 0.02160808286636101, 'auc': 0.5382634892071323, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.04452804915318664
Average Valid loss:  0.0288164762539729
Model saved as current val_loss is:  0.0288164762539729


  0%|          | 0/491 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

{'precision_micro': 0.004895687272015838, 'RP@K': 0.022941076583460687, 'auc': 0.5272296966210236, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.02973408789170978
Average Valid loss:  0.028788607447378097
Model saved as current val_loss is:  0.028788607447378097


  0%|          | 0/491 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

{'precision_micro': 0.005088845244079, 'RP@K': 0.019782645610460177, 'auc': 0.5430885328779391, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.029716606875938457
Average Valid loss:  0.02879960110951816


  0%|          | 0/491 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

{'precision_micro': 0.005024388703545092, 'RP@K': 0.027381558838512482, 'auc': 0.530460275606385, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.029688870291512025
Average Valid loss:  0.02885735458544185


  0%|          | 0/491 [00:00<?, ?it/s]

  0%|          | 0/62 [00:00<?, ?it/s]

{'precision_micro': 0.005067241629741507, 'RP@K': 0.024002377313635588, 'auc': 0.5332889303355492, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.029650934601995464
Average Valid loss:  0.02881615641977518


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▁▁▁▁
val_loss,▄▁▂█▄

0,1
epoch,5.0
train_loss,0.02965
val_loss,0.02882


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.


In [99]:
train, val, test, n_labels = get_data(train, val, test, 50, 100) 
print(train.shape)

  "unknown class(es) {0} will be ignored".format(sorted(unknown, key=str))


(45600, 300)


In [100]:
wandb.agent(sweep_id, function=trainer, count=6)

[34m[1mwandb[0m: Agent Starting Run: co26nz39 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dropout: 0.4
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.0002
[34m[1mwandb[0m: 	tokenizer_max_len: 100


Length of Train Dataloader:  1425
Length of Valid Dataloader:  179




  0%|          | 0/5 [00:00<?, ?it/s]

  0%|          | 0/1425 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

{'precision_micro': 0.001829351795325693, 'RP@K': 0.034903879162375556, 'auc': 0.540223937597297, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.01757639802976005
Average Valid loss:  0.011532868027125158
Model saved as current val_loss is:  0.011532868027125158


  0%|          | 0/1425 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

{'precision_micro': 0.001666432983559426, 'RP@K': 0.017370408513559903, 'auc': 0.5300842888585883, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011992755673596995
Average Valid loss:  0.011523154712485035
Model saved as current val_loss is:  0.011523154712485035


  0%|          | 0/1425 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

{'precision_micro': 0.0016594268226375153, 'RP@K': 0.02035702025403364, 'auc': 0.5213505554980138, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011972153654583453
Average Valid loss:  0.011590859980202754


  0%|          | 0/1425 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

{'precision_micro': 0.0016681962664606286, 'RP@K': 0.012778922073463782, 'auc': 0.527970047318492, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.011952141532184261
Average Valid loss:  0.011598269135155324


  0%|          | 0/1425 [00:00<?, ?it/s]

  0%|          | 0/179 [00:00<?, ?it/s]

{'precision_micro': 0.001748655212298854, 'RP@K': 0.017953999313422588, 'auc': 0.5369884726706986, 'f1_score_micro': 0.0, 'f1_score_macro': 0.0}
Average Train loss:  0.01191708529204653
Average Valid loss:  0.011426965084951892
Model saved as current val_loss is:  0.011426965084951892


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▃▅▆█
train_loss,█▁▁▁▁
val_loss,▅▅██▁

0,1
epoch,5.0
train_loss,0.01192
val_loss,0.01143


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Sweep Agent: Exiting.
