In [None]:
pip install transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m41.9 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m13.9 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m84.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import gc

import pandas as pd
import numpy as np

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.optim import AdamW
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModel #AutoModelForSequenceClassification
from transformers import get_linear_schedule_with_warmup
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, cohen_kappa_score

def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

In [None]:
sectors = ['finance', 'gold', 'ferrous_metallurgy', 'oil_gas', 'non_ferrous_metallurgy', 'electrical networks', 'telecom', 'it', 'real_estate']
companies = ['ALRS', 'AFLT', 'VTBR', 'GAZP', 'GMKN', 'LSRG', 'DSKY', 'IRAO', 'LKOH', 'MAGN', 'MOEX', 'CBOM', 'MTSS', 'ROSN', 'NLMK', 'NVTK', 'PIKK', 'PLZL',
              'RTKM', 'RUAL', 'HYDR', 'SBER', 'CHMF', 'AFKS', 'SNGS', 'TATN', 'TCSG', 'TRNFP', 'POGR', 'PHOR', 'FEES', 'HHRU', 'YNDX']

In [None]:
sectors = ['finance', 'gold', 'ferrous_metallurgy', 'oil_gas', 'non_ferrous_metallurgy', 'electrical networks', 'telecom', 'it', 'real_estate']
companies = ['ALRS', 'AFLT', 'VTBR', 'GAZP', 'GMKN', 'DSKY', 'IRAO', 'LKOH', 'MAGN', 'MOEX', 'CBOM', 'MTSS',
             'ROSN', 'NVTK', 'PIKK', 'RTKM', 'RUAL', 'HYDR', 'SBER', 'CHMF', 'AFKS', 'SNGS', 'TATN',
             'TCSG', 'TRNFP', 'PHOR', 'YNDX']

## Берт как в соревновании один таймфрейм

In [None]:
class NewsDataset(Dataset):
    def __init__(self, texts, targets):
    
      self.labels = targets.astype(int)
      self.texts = [tokenizer(text, 
                          padding='max_length', max_length = 512, truncation=True,
                          return_tensors="pt") for text in texts]

    def __len__(self):
        return len(self.labels)


    def __getitem__(self, idx):

        batch_texts = self.texts[idx]
        batch_y = self.labels[idx] + 1

        return batch_texts, batch_y

In [None]:
tokenizer = AutoTokenizer.from_pretrained("DeepPavlov/distilrubert-base-cased-conversational")

Downloading (…)okenizer_config.json:   0%|          | 0.00/24.0 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/538 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/1.40M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [None]:
class BertClassifier(nn.Module):
    def __init__(self, freeze=False):
        super(BertClassifier, self).__init__()

        input_layer = 768
        hidden_layer = 50
        output_layer = 3

        self.bert = AutoModel.from_pretrained("DeepPavlov/distilrubert-base-cased-conversational")

        self.fc1 = nn.Linear(input_layer*2, input_layer//2)
        self.fc2 = nn.Linear(input_layer*2 + input_layer//2, input_layer)
        self.fc3 = nn.Linear(input_layer, output_layer)

        self.drop1 = nn.Dropout(0.1)
        self.drop2 = nn.Dropout(0.1)

        self.act = nn.ReLU()

        if freeze:
            for param in self.bert.parameters():
                param.requires_grad = False
        
    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids, attention_mask)
        sentence_embeddings = mean_pooling(outputs[1], attention_mask)
        embeddings = torch.cat([outputs[0][:,0,:], sentence_embeddings], dim=1)
        embeddings1 = self.fc1(embeddings)
        embeddings = torch.cat([embeddings, embeddings1], dim=1)
        embeddings = self.act(embeddings)
        embeddings = self.drop1(embeddings)
        embeddings = self.fc2(embeddings)
        embeddings = self.act(embeddings)
        embeddings = self.drop2(embeddings)
        logits = self.fc3(embeddings)

        return logits

In [None]:
def calculate_metrics(labels, preds, preds_bin):
    acc = accuracy_score(labels, preds_bin)
    pre = precision_score(labels, preds_bin)
    rec = recall_score(labels, preds_bin)
    f1 = f1_score(labels, preds_bin)
    auc = roc_auc_score(labels, preds[:,1])
    kappa = cohen_kappa_score(labels, preds_bin)
    return acc, pre, rec, f1, auc, kappa

In [None]:
def train(epochs, train_dataloader, val_dataloader):
    best_acc = -np.inf
    
    for e in range(epochs):
        model.train()

        train_loss = 0
        
        for batch in tqdm(train_dataloader):

            batch_data, batch_labels = batch

            batch_inputs = batch_data['input_ids'].to(device).squeeze(1)
            batch_masks = batch_data['attention_mask'].to(device).squeeze(1)
            batch_labels = batch_labels.to(device)

            model.zero_grad()

            logits = model(batch_inputs, batch_masks)
            loss = loss_function(logits, batch_labels)
            
            loss.backward()

            train_loss += loss.item()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
        
        train_loss /= len(train_dataloader)

        model.eval()

        test_loss = 0
        
        all_test = []
        all_test_bin = []
        labels = []

        for batch in tqdm(val_dataloader):

            batch_data, batch_labels = batch

            batch_inputs = batch_data['input_ids'].to(device).squeeze(1)
            batch_masks = batch_data['attention_mask'].to(device).squeeze(1)
            batch_labels = batch_labels.to(device)

            with torch.no_grad():
                logits = model(batch_inputs, batch_masks)

            loss = loss_function(logits, batch_labels)
            test_loss += loss.item()

            preds = F.softmax(logits, dim=-1)
            preds_bin = preds.argmax(dim=-1) - 1
            
            batch_labels = batch_labels.detach().cpu().numpy()
            preds_bin = preds_bin.detach().cpu().numpy()
            preds = preds.detach().cpu().numpy()
            
            all_test.append(preds)
            all_test_bin.append(preds_bin)
            labels.append(batch_labels)

        test_loss /= len(val_dataloader)
        all_test = np.concatenate(all_test, axis=0)
        all_test_bin = np.concatenate(all_test_bin, axis=0)
        labels = np.concatenate(labels, axis=0)
        
        acc, pre, rec, f1, auc, kappa = calculate_metrics(labels, all_test, all_test_bin)
        
        if acc > best_acc:
            best_acc = acc
            torch.save(model.state_dict(), f'/content/drive/MyDrive/weights/dbert_tg_{col}_bin.pth')
        
        print('Epoch: %d  |  Train Loss: %1.5f  |  Test Loss: %1.5f'%(e+1, train_loss, test_loss))
        
        print('Val acc: %1.4f'%(acc))
        print('Val pre: %1.4f'%(pre))
        print('Val rec: %1.4f'%(rec))
        print('Val f1: %1.4f'%(f1))
        print('Val auc: %1.4f'%(auc))
        print('Val kappa: %1.4f'%(kappa))

In [None]:
def evaluate(test_dataloader):

  model.load_state_dict(torch.load(f'/content/drive/MyDrive/weights/dbert_tg_{col}_bin.pth'))
  model.eval()

  test_loss = 0

  all_test = []
  all_test_bin = []
  labels = []

  for batch in tqdm(test_dataloader):

      batch_data, batch_labels = batch

      batch_inputs = batch_data['input_ids'].to(device).squeeze(1)
      batch_masks = batch_data['attention_mask'].to(device).squeeze(1)
      batch_labels = batch_labels.to(device)

      with torch.no_grad():
          logits = model(batch_inputs, batch_masks)

      loss = loss_function(logits, batch_labels)
      test_loss += loss.item()

      preds = F.softmax(logits, dim=-1)
      preds_bin = preds.argmax(dim=-1) - 1
      
      batch_labels = batch_labels.detach().cpu().numpy()
      preds_bin = preds_bin.detach().cpu().numpy()
      preds = preds.detach().cpu().numpy()
      
      all_test.append(preds)
      all_test_bin.append(preds_bin)
      labels.append(batch_labels)

  test_loss /= len(test_dataloader)
  all_test = np.concatenate(all_test, axis=0)
  all_test_bin = np.concatenate(all_test_bin, axis=0)
  labels = np.concatenate(labels, axis=0)

  acc, pre, rec, f1, auc, kappa = calculate_metrics(labels, all_test, all_test_bin)

  print('Test loss: %1.4f'%(test_loss))
  print('Test acc: %1.4f'%(acc))
  print('Test pre: %1.4f'%(pre))
  print('Test rec: %1.4f'%(rec))
  print('Test f1: %1.4f'%(f1))
  print('Test auc: %1.4f'%(auc))
  print('Test kappa: %1.4f'%(kappa))
  return acc, pre, rec, f1, auc, kappa

In [None]:
news = pd.read_parquet('/content/drive/MyDrive/Диссертация/Парсеры сайтов/tg_comps_raw_1,5sigma_.parquet')
# news['message'] = news['title'] + ' ' + news['announce']
# news = news[news['message'] != ' '].copy()
# news.drop(columns=['link', 'title', 'announce', 'text'], inplace=True)
news.dropna(subset=['message'],inplace=True)

news_train = news[news.date < '2021-01-01'].copy()
news_val = news[(news.date > '2021-01-01') & (news.date < '2021-06-01')].copy()
news_test = news[(news.date < '2022-01-01') & (news.date > '2021-06-01')].copy()

In [None]:
loggs = pd.DataFrame()
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# for company in ['LSRG', 'DSKY', 'MOEX', 'MTSS']:
for company in ['NVTK']:
    print(company)
    
    first_dates = pd.read_csv(f'/content/drive/MyDrive/Диссертация/Парсеры сайтов/Стоимость акций/1 мин/first_dates.csv', index_col='company')
    first_day = pd.to_datetime(first_dates.loc[company,:].values[0])

    comp_train = news_train[news_train[company] == True].copy()
    comp_val = news_val[news_val[company] == True].copy()
    comp_test = news_test[news_test[company] == True].copy()

    cols = ['date', 'message'] + [col for col in news_train.columns if f'{company}_' in col]
    cols = [col for col in comp_test.columns if f'{company}_' in col]

    train_texts, val_texts, test_texts = comp_train['message'].values, comp_val['message'].values, comp_test['message'].values

    for col in cols[2:]:
      print(col)
      train_target, val_target, test_target = comp_train[col].values, comp_val[col].values, comp_test[col].values

      batch_size = 32
      train_dataset, val_dataset, test_dataset = NewsDataset(train_texts, train_target), NewsDataset(val_texts, val_target), NewsDataset(test_texts, test_target)

      train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
      val_dataloader = DataLoader(val_dataset, batch_size=batch_size)
      test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

      model = BertClassifier(freeze=False).to(device)

      epochs = 8
      steps = len(train_dataloader) * epochs
      learning_rate = 5e-5
      epsilon = 1e-8

      loss_function = nn.CrossEntropyLoss()
      optimizer = AdamW(model.parameters(), lr=learning_rate, eps=epsilon)
      scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=steps)
      
      train(epochs, train_dataloader, val_dataloader)
      acc, pre, rec, f1, auc, kappa = evaluate(test_dataloader)

      loggs[f'{col}'] = [acc, pre, rec, auc, f1, kappa]
      loggs.to_csv('/content/drive/MyDrive/Диссертация/tg_loggs/dist_bert_tg_loggs_neutral_magn_chmf_dsky_sngs_nvtk.csv')
    
      del model
      del train_dataset
      del val_dataset
      del test_dataset
      del train_dataloader
      del val_dataloader
      del test_dataloader
      
      gc.collect()
      torch.cuda.empty_cache()
      gc.collect()

NVTK
NVTK_10_min_mean


Downloading pytorch_model.bin:   0%|          | 0.00/542M [00:00<?, ?B/s]

Some weights of the model checkpoint at DeepPavlov/distilrubert-base-cased-conversational were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 104/104 [02:26<00:00,  1.41s/it]
100%|██████████| 19/19 [00:10<00:00,  1.89it/s]


Epoch: 1  |  Train Loss: 0.69544  |  Test Loss: 0.69522
Val acc: 0.5329
Val pre: 0.5329
Val rec: 1.0000
Val f1: 0.6953
Val auc: 0.4701
Val kappa: 0.0000


100%|██████████| 104/104 [02:31<00:00,  1.46s/it]
100%|██████████| 19/19 [00:10<00:00,  1.82it/s]


Epoch: 2  |  Train Loss: 0.69031  |  Test Loss: 0.70761
Val acc: 0.5329
Val pre: 0.5329
Val rec: 1.0000
Val f1: 0.6953
Val auc: 0.4634
Val kappa: 0.0000


100%|██████████| 104/104 [02:34<00:00,  1.48s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 3  |  Train Loss: 0.68235  |  Test Loss: 0.71836
Val acc: 0.4836
Val pre: 0.5153
Val rec: 0.5185
Val f1: 0.5169
Val auc: 0.4753
Val kappa: -0.0378


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 4  |  Train Loss: 0.62295  |  Test Loss: 0.80239
Val acc: 0.4967
Val pre: 0.5259
Val rec: 0.5648
Val f1: 0.5446
Val auc: 0.4700
Val kappa: -0.0163


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 5  |  Train Loss: 0.42090  |  Test Loss: 1.08514
Val acc: 0.4934
Val pre: 0.5241
Val rec: 0.5370
Val f1: 0.5305
Val auc: 0.4652
Val kappa: -0.0193


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 6  |  Train Loss: 0.25783  |  Test Loss: 1.53835
Val acc: 0.4622
Val pre: 0.4960
Val rec: 0.5679
Val f1: 0.5295
Val auc: 0.4525
Val kappa: -0.0915


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 7  |  Train Loss: 0.17324  |  Test Loss: 1.84238
Val acc: 0.4836
Val pre: 0.5126
Val rec: 0.6265
Val f1: 0.5639
Val auc: 0.4585
Val kappa: -0.0539


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 8  |  Train Loss: 0.11713  |  Test Loss: 1.98200
Val acc: 0.4556
Val pre: 0.4904
Val rec: 0.5525
Val f1: 0.5196
Val auc: 0.4583
Val kappa: -0.1034


100%|██████████| 31/31 [00:16<00:00,  1.85it/s]


Test loss: 0.6944
Test acc: 0.5289
Test pre: 0.5289
Test rec: 1.0000
Test f1: 0.6918
Test auc: 0.4883
Test kappa: 0.0000
NVTK_15_min_mean


Some weights of the model checkpoint at DeepPavlov/distilrubert-base-cased-conversational were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 1  |  Train Loss: 0.69488  |  Test Loss: 0.69504
Val acc: 0.5280
Val pre: 0.5280
Val rec: 1.0000
Val f1: 0.6911
Val auc: 0.4451
Val kappa: 0.0000


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 2  |  Train Loss: 0.69131  |  Test Loss: 0.69889
Val acc: 0.4572
Val pre: 0.4842
Val rec: 0.4299
Val f1: 0.4554
Val auc: 0.4582
Val kappa: -0.0817


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 3  |  Train Loss: 0.68610  |  Test Loss: 0.69929
Val acc: 0.5033
Val pre: 0.5262
Val rec: 0.5950
Val f1: 0.5585
Val auc: 0.4797
Val kappa: -0.0043


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 4  |  Train Loss: 0.65944  |  Test Loss: 0.81776
Val acc: 0.4424
Val pre: 0.4758
Val rec: 0.5514
Val f1: 0.5108
Val auc: 0.4290
Val kappa: -0.1293


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 5  |  Train Loss: 0.52463  |  Test Loss: 0.98072
Val acc: 0.4490
Val pre: 0.4733
Val rec: 0.3863
Val f1: 0.4254
Val auc: 0.4377
Val kappa: -0.0935


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 6  |  Train Loss: 0.33296  |  Test Loss: 1.26313
Val acc: 0.4457
Val pre: 0.4735
Val rec: 0.4455
Val f1: 0.4591
Val auc: 0.4318
Val kappa: -0.1081


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 7  |  Train Loss: 0.21493  |  Test Loss: 1.67678
Val acc: 0.4457
Val pre: 0.4755
Val rec: 0.4829
Val f1: 0.4791
Val auc: 0.4395
Val kappa: -0.1131


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 8  |  Train Loss: 0.15509  |  Test Loss: 1.93452
Val acc: 0.4539
Val pre: 0.4830
Val rec: 0.4860
Val f1: 0.4845
Val auc: 0.4423
Val kappa: -0.0959


100%|██████████| 31/31 [00:16<00:00,  1.86it/s]


Test loss: 0.6930
Test acc: 0.5258
Test pre: 0.5258
Test rec: 1.0000
Test f1: 0.6892
Test auc: 0.4818
Test kappa: 0.0000
NVTK_30_min_mean


Some weights of the model checkpoint at DeepPavlov/distilrubert-base-cased-conversational were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 1  |  Train Loss: 0.69363  |  Test Loss: 0.69890
Val acc: 0.4951
Val pre: 0.4951
Val rec: 1.0000
Val f1: 0.6623
Val auc: 0.4731
Val kappa: 0.0000


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 2  |  Train Loss: 0.68970  |  Test Loss: 0.71258
Val acc: 0.4688
Val pre: 0.4688
Val rec: 0.5482
Val f1: 0.5054
Val auc: 0.4813
Val kappa: -0.0608


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 3  |  Train Loss: 0.68460  |  Test Loss: 0.69602
Val acc: 0.4605
Val pre: 0.4702
Val rec: 0.7076
Val f1: 0.5650
Val auc: 0.4772
Val kappa: -0.0738


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 4  |  Train Loss: 0.65777  |  Test Loss: 0.78899
Val acc: 0.4539
Val pre: 0.4514
Val rec: 0.4784
Val f1: 0.4645
Val auc: 0.4559
Val kappa: -0.0916


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 5  |  Train Loss: 0.49742  |  Test Loss: 1.07071
Val acc: 0.4622
Val pre: 0.4581
Val rec: 0.4718
Val f1: 0.4648
Val auc: 0.4502
Val kappa: -0.0754


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 6  |  Train Loss: 0.33423  |  Test Loss: 1.43864
Val acc: 0.4474
Val pre: 0.4426
Val rec: 0.4485
Val f1: 0.4455
Val auc: 0.4313
Val kappa: -0.1052


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 7  |  Train Loss: 0.22371  |  Test Loss: 1.59398
Val acc: 0.4688
Val pre: 0.4737
Val rec: 0.6578
Val f1: 0.5508
Val auc: 0.4476
Val kappa: -0.0586


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 8  |  Train Loss: 0.17576  |  Test Loss: 1.85098
Val acc: 0.4688
Val pre: 0.4711
Val rec: 0.5947
Val f1: 0.5257
Val auc: 0.4485
Val kappa: -0.0599


100%|██████████| 31/31 [00:16<00:00,  1.87it/s]


Test loss: 0.6940
Test acc: 0.5268
Test pre: 0.5268
Test rec: 1.0000
Test f1: 0.6901
Test auc: 0.4727
Test kappa: 0.0000
NVTK_1_hour_mean


Some weights of the model checkpoint at DeepPavlov/distilrubert-base-cased-conversational were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 1  |  Train Loss: 0.69588  |  Test Loss: 0.69795
Val acc: 0.4309
Val pre: 0.3851
Val rec: 0.4559
Val f1: 0.4175
Val auc: 0.4132
Val kappa: -0.1311


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 2  |  Train Loss: 0.69036  |  Test Loss: 0.71161
Val acc: 0.4293
Val pre: 0.3762
Val rec: 0.4191
Val f1: 0.3965
Val auc: 0.4181
Val kappa: -0.1418


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 3  |  Train Loss: 0.68620  |  Test Loss: 0.73772
Val acc: 0.4474
Val pre: 0.4474
Val rec: 1.0000
Val f1: 0.6182
Val auc: 0.4590
Val kappa: 0.0000


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 4  |  Train Loss: 0.64912  |  Test Loss: 0.77903
Val acc: 0.4326
Val pre: 0.3983
Val rec: 0.5257
Val f1: 0.4532
Val auc: 0.4452
Val kappa: -0.1137


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 5  |  Train Loss: 0.43656  |  Test Loss: 1.13962
Val acc: 0.4309
Val pre: 0.4223
Val rec: 0.7390
Val f1: 0.5374
Val auc: 0.4615
Val kappa: -0.0742


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 6  |  Train Loss: 0.27048  |  Test Loss: 1.48745
Val acc: 0.4474
Val pre: 0.4252
Val rec: 0.6691
Val f1: 0.5200
Val auc: 0.4900
Val kappa: -0.0598


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.80it/s]


Epoch: 7  |  Train Loss: 0.17800  |  Test Loss: 1.76662
Val acc: 0.4622
Val pre: 0.4282
Val rec: 0.6029
Val f1: 0.5008
Val auc: 0.4739
Val kappa: -0.0470


100%|██████████| 104/104 [02:35<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.79it/s]


Epoch: 8  |  Train Loss: 0.11756  |  Test Loss: 2.11868
Val acc: 0.4441
Val pre: 0.4158
Val rec: 0.5993
Val f1: 0.4910
Val auc: 0.4664
Val kappa: -0.0790


100%|██████████| 31/31 [00:16<00:00,  1.85it/s]


Test loss: 1.5392
Test acc: 0.5165
Test pre: 0.5186
Test rec: 0.6232
Test f1: 0.5661
Test auc: 0.5173
Test kappa: 0.0304
NVTK_1_day_mean


Some weights of the model checkpoint at DeepPavlov/distilrubert-base-cased-conversational were not used when initializing DistilBertModel: ['vocab_projector.bias', 'vocab_transform.bias', 'vocab_projector.weight', 'vocab_layer_norm.bias', 'vocab_transform.weight', 'vocab_layer_norm.weight']
- This IS expected if you are initializing DistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 1  |  Train Loss: 0.69698  |  Test Loss: 0.69049
Val acc: 0.5806
Val pre: 0.5607
Val rec: 0.4718
Val f1: 0.5124
Val auc: 0.5787
Val kappa: 0.1492


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.82it/s]


Epoch: 2  |  Train Loss: 0.69203  |  Test Loss: 0.70158
Val acc: 0.4688
Val pre: 0.4668
Val rec: 0.9648
Val f1: 0.6292
Val auc: 0.4947
Val kappa: -0.0012


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.82it/s]


Epoch: 3  |  Train Loss: 0.69101  |  Test Loss: 0.68985
Val acc: 0.5461
Val pre: 0.5312
Val rec: 0.2394
Val f1: 0.3301
Val auc: 0.5530
Val kappa: 0.0562


100%|██████████| 104/104 [02:34<00:00,  1.48s/it]
100%|██████████| 19/19 [00:10<00:00,  1.83it/s]


Epoch: 4  |  Train Loss: 0.68515  |  Test Loss: 0.69231
Val acc: 0.5329
Val pre: 0.5000
Val rec: 0.1479
Val f1: 0.2283
Val auc: 0.5042
Val kappa: 0.0191


100%|██████████| 104/104 [02:34<00:00,  1.48s/it]
100%|██████████| 19/19 [00:10<00:00,  1.82it/s]


Epoch: 5  |  Train Loss: 0.67753  |  Test Loss: 0.70437
Val acc: 0.5329
Val pre: 0.5000
Val rec: 0.1690
Val f1: 0.2526
Val auc: 0.4819
Val kappa: 0.0218


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 6  |  Train Loss: 0.66855  |  Test Loss: 0.70075
Val acc: 0.4951
Val pre: 0.4700
Val rec: 0.6338
Val f1: 0.5397
Val auc: 0.5086
Val kappa: 0.0071


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 7  |  Train Loss: 0.62756  |  Test Loss: 0.74262
Val acc: 0.4934
Val pre: 0.4657
Val rec: 0.5739
Val f1: 0.5142
Val auc: 0.5022
Val kappa: -0.0032


100%|██████████| 104/104 [02:34<00:00,  1.49s/it]
100%|██████████| 19/19 [00:10<00:00,  1.81it/s]


Epoch: 8  |  Train Loss: 0.51968  |  Test Loss: 0.85978
Val acc: 0.5082
Val pre: 0.4681
Val rec: 0.3873
Val f1: 0.4239
Val auc: 0.4927
Val kappa: 0.0015


100%|██████████| 31/31 [00:16<00:00,  1.87it/s]


Test loss: 0.6949
Test acc: 0.4732
Test pre: 0.5589
Test rec: 0.3040
Test f1: 0.3938
Test auc: 0.4762
Test kappa: -0.0046
