# Импорты

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd drive/MyDrive/Курсовая

/content/drive/MyDrive/Курсовая


In [None]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import re
import random
import os
import math
import pickle

import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer

import transformers
from transformers import BertForSequenceClassification, AdamW, BertConfig,BertTokenizer,get_linear_schedule_with_warmup
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, random_split
from torch.nn import CrossEntropyLoss

In [None]:
def seed_everything(seed: int):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = True

seed_everything(42)

# Подготовка

In [None]:
df = pd.read_csv('sentiment140_100000.csv')

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

In [None]:
# словарь с эмбеддингами
with open('knowledge.pickle', 'rb') as f:
  knowledge_dct = pickle.load(f)

In [None]:
res_dct = {}

# Функции

In [None]:
def get_dataloaders(tokenizer, text_column, knowledge_dct=None, user_embedding_dim=None):
  """
  Функция для получения даталоадеров, чтобы обучать и тестировать модель
  Может создавать даталоадеры, учитывая доп. знание
  """
  # вычислим максимальную длину, добавляем 2, так как еще [CLS] и [SEP]
  max_length = max(df[text_column].apply(lambda x: len(x.split(' '))).value_counts().index) + 2

  # токенизируем train, val и test
  encoder_train = tokenizer.batch_encode_plus(
      df[df["data_type"]=='train'][text_column].values,
      add_special_tokens = True,
      return_attention_mask = True,
      truncation = True,
      padding = 'max_length',
      max_length = max_length,
      return_tensors = 'pt'
      )

  encoder_val = tokenizer.batch_encode_plus(
      df[df["data_type"]=='val'][text_column].values,
      add_special_tokens = True,
      return_attention_mask = True,
      truncation = True,
      padding = 'max_length',
      max_length = max_length,
      return_tensors = 'pt'
      )

  encoder_test = tokenizer.batch_encode_plus(
      df[df["data_type"]=='test'][text_column].values,
      add_special_tokens = True,
      return_attention_mask = True,
      truncation = True,
      padding = 'max_length',
      max_length = max_length,
      return_tensors = 'pt'
      )

  input_ids_train = encoder_train['input_ids']
  attention_masks_train = encoder_train["attention_mask"]
  labels_train = torch.tensor(df[df['data_type']=='train'].target.values)

  input_ids_val = encoder_val['input_ids']
  attention_masks_val = encoder_val["attention_mask"]
  labels_val = torch.tensor(df[df['data_type']=='val'].target.values)

  input_ids_test = encoder_test['input_ids']
  attention_masks_test = encoder_test["attention_mask"]
  labels_test = torch.tensor(df[df['data_type']=='test'].target.values)

  # создаем датасеты, учитывая доп. знание, если оно есть
  if knowledge_dct is not None:
    # создадим эмбеддинг усредненный по всем пользователям, если вдруг в словаре
    # не окажется пользователя (на случай val и test)
    no_embedding = np.zeros(user_embedding_dim)
    for user, embedding in knowledge_dct.items():
      no_embedding += embedding
    no_embedding = no_embedding/len(knowledge_dct)

    numerical_train = torch.tensor([np.array(knowledge_dct[user]) for user in df[df["data_type"]=='train'].user.values]).float()
    numerical_val = torch.tensor([np.array(knowledge_dct[user]) for user in df[df["data_type"]=='val'].user.values]).float()
    numerical_test = torch.tensor([np.array(knowledge_dct[user]) for user in df[df["data_type"]=='test'].user.values]).float()

    data_train = TensorDataset(input_ids_train, attention_masks_train, labels_train, numerical_train)
    data_test = TensorDataset(input_ids_test, attention_masks_test,labels_test, numerical_test)
    data_val = TensorDataset(input_ids_val, attention_masks_val,labels_val, numerical_val)

  # если доп. знания нет
  else:
    data_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
    data_test = TensorDataset(input_ids_test, attention_masks_test,labels_test)
    data_val = TensorDataset(input_ids_val, attention_masks_val,labels_val)

  # создаем даталоадеры с размером батча = 32
  dataloader_train = DataLoader(
    data_train,
    sampler= RandomSampler(data_train),
    batch_size=32
  )

  dataloader_val = DataLoader(
      data_val,
      sampler= RandomSampler(data_val),
      batch_size=32
  )

  dataloader_test = DataLoader(
      data_test,
      sampler= RandomSampler(data_test),
      batch_size=32
  )

  return dataloader_train, dataloader_val, dataloader_test

In [None]:
def score_func(prediction, true_labels):
  """
  Функция для вычисления F1-меры между предсказанием и реальным значением
  """
  prediction_flat = np.argmax(prediction,axis=1).flatten()
  true_labels_flat = true_labels.flatten()

  f1 = f1_score(true_labels_flat, prediction_flat)

  return f1

In [None]:
def evaluate(dataloader_val, with_additional_embeddings=False):
  """
  Функция, подсчитывающая loss и предсказания для val и test даталоадеров
  """
  model.eval()

  loss_val_total = 0
  predictions,true_vals = [],[]

  for batch in tqdm(dataloader_val):

      # если дополнительно эмбеддинги пользователей
      if with_additional_embeddings:
        b_input_ids = batch[0].to(device)
        b_input_mask = batch[1].to(device)
        b_labels = batch[2].to(device)
        b_number_feats = batch[3].to(device)

        with torch.no_grad():
          outputs = model(
              b_input_ids,
              token_type_ids=None,
              attention_mask=b_input_mask,
              labels=b_labels,
              numerical_feats=b_number_feats
              )

      # без дополнительных эмбеддингов
      else:
        batch = tuple(b.to(device) for b in batch)

        inputs = {'input_ids':  batch[0],
                  'attention_mask':batch[1],
                  'labels': batch[2]
                  }

        with torch.no_grad():
            outputs = model(**inputs)

      loss = outputs['loss'] if with_additional_embeddings else outputs[0]
      logits =  outputs['logits'] if with_additional_embeddings else outputs[1]
      loss_val_total +=loss.item()

      logits = logits.detach().cpu().numpy()
      label_ids = b_labels.cpu().numpy() if with_additional_embeddings else inputs['labels'].cpu().numpy()
      predictions.append(logits)
      true_vals.append(label_ids)

  loss_val_avg = loss_val_total/len(dataloader_val)

  predictions = np.concatenate(predictions,axis=0)
  true_vals = np.concatenate(true_vals,axis=0)

  return loss_val_avg, predictions, true_vals

In [None]:
def train_validate(model,
                   dataloader_train, dataloader_val, dataloader_test,
                   res_dct, name,
                   num_epochs=5,
                   with_additional_embeddings=False):
  """
  Функция для обучения модели и замера качества
  """
  optimizer = AdamW(model.parameters(), lr=1e-5, eps=1e-8)

  scheduler = get_linear_schedule_with_warmup(
              optimizer,
              num_warmup_steps = 0,
              num_training_steps = len(dataloader_train)*num_epochs
  )

  res_dct[name] = {}
  res_dct[name]['train_loss'] = []
  res_dct[name]['val_loss'] = []

  for epoch in tqdm(range(1, num_epochs+1)):
    model.train()

    loss_train_total=0

    progress_bar = tqdm(dataloader_train, desc = "Epoch: {:1d}".format(epoch), leave = False, disable = False)

    for batch in progress_bar:
        model.zero_grad()

        # если дополнительно эмбеддинги пользователей
        if with_additional_embeddings:
          b_input_ids = batch[0].to(device)
          b_input_mask = batch[1].to(device)
          b_labels = batch[2].to(device)
          b_number_feats = batch[3].to(device)

          outputs = model(b_input_ids,
                          token_type_ids=None,
                          attention_mask=b_input_mask,
                          labels=b_labels,
                          numerical_feats=b_number_feats
                          )

        # без дополнительных эмбеддингов
        else:
          batch = tuple(b.to(device) for b in batch)

          inputs = {
              "input_ids": batch[0],
              "attention_mask": batch[1],
              "labels": batch[2]
          }

          outputs = model(**inputs)

        loss = outputs['loss'] if with_additional_embeddings else outputs[0]
        loss_train_total += loss.item()
        loss.backward()

        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()
        scheduler.step()

        progress_bar.set_postfix({'training_loss':'{:.3f}'.format(loss.item()/len(batch))})

    tqdm.write(f'\nEpoch {epoch}')

    loss_train_avg = loss_train_total/len(dataloader_train)
    tqdm.write(f'Training Loss: {loss_train_avg}')

    val_loss, val_predictions, true_vals = evaluate(dataloader_val, with_additional_embeddings)
    val_f1_score = score_func(val_predictions, true_vals)
    tqdm.write(f'Val Loss:{val_loss}\n Val F1: {val_f1_score}')

    _, test_predictions, true_test = evaluate(dataloader_test, with_additional_embeddings)
    test_f1_score = score_func(test_predictions, true_test)
    tqdm.write(f'Test F1: {test_f1_score}')

    res_dct[name]['train_loss'].append(loss_train_avg)
    res_dct[name]['val_loss'].append(val_loss)

# Модели

In [None]:
class MLP(nn.Module):
    """
    Многослойный перцептрон с функцией активации ReLU
    Код взят отсюда - https://github.com/georgian-io/Multimodal-Toolkit/blob/master/multimodal_transformers/model/layer_utils.py
    """
    def __init__(
        self,
        input_dim,
        output_dim,
        num_hidden_lyr=2,
        dropout_prob=0.5,
        return_layer_outs=False,
        hidden_channels=None,
        bn=False,
    ):
        super().__init__()
        self.out_dim = output_dim
        self.dropout = nn.Dropout(dropout_prob)
        self.return_layer_outs = return_layer_outs
        if not hidden_channels:
            hidden_channels = [input_dim for _ in range(num_hidden_lyr)]
        elif len(hidden_channels) != num_hidden_lyr:
            raise ValueError(
                "number of hidden layers should be the same as the lengh of hidden_channels"
            )
        self.layer_channels = [input_dim] + hidden_channels + [output_dim]
        self.act_name = 'relu'
        self.activation = nn.ReLU()
        self.layers = nn.ModuleList(
            list(
                map(
                    self.weight_init,
                    [
                        nn.Linear(self.layer_channels[i], self.layer_channels[i + 1])
                        for i in range(len(self.layer_channels) - 2)
                    ],
                )
            )
        )
        final_layer = nn.Linear(self.layer_channels[-2], self.layer_channels[-1])
        self.weight_init(final_layer, activation="linear")
        self.layers.append(final_layer)

        self.bn = bn
        if self.bn:
            self.bn = nn.ModuleList(
                [torch.nn.BatchNorm1d(dim) for dim in self.layer_channels[1:-1]]
            )

    def weight_init(self, m, activation=None):
        if activation is None:
            activation = self.act_name
        torch.nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain(activation))
        return m

    def forward(self, x):
        """
        :param x: the input features
        :return: tuple containing output of MLP,
                and list of inputs and outputs at every layer
        """
        layer_inputs = [x]
        for i, layer in enumerate(self.layers):
            input = layer_inputs[-1]
            if layer == self.layers[-1]:
                layer_inputs.append(layer(input))
            else:
                if self.bn:
                    output = self.activation(self.bn[i](layer(input)))
                else:
                    output = self.activation(layer(input))
                layer_inputs.append(self.dropout(output))

        # model.store_layer_output(self, layer_inputs[-1])
        if self.return_layer_outs:
            return layer_inputs[-1], layer_inputs
        else:
            return layer_inputs[-1]

In [None]:
class BertConcatFeatures(BertForSequenceClassification):
  """
  Bert с объединением числовых фичей
  """
  def __init__(self, config):
    super().__init__(config)
    self.num_labels = config.num_labels
    combined_feat_dim = config.text_feat_dim + config.numerical_feat_dim
    self.num_bn = nn.BatchNorm1d(config.numerical_feat_dim)

    dims = []
    dim = combined_feat_dim

    while True:
        dim = dim // 4
        if dim <= self.num_labels:
          break

        dims.append(int(dim))

    print('MLP layer sizes:')
    print(' Input:', combined_feat_dim)
    print(' Hidden:', dims)
    print(' Output:', self.num_labels)
    print('')

    self.mlp = MLP(combined_feat_dim,
                   self.num_labels,
                   num_hidden_lyr=len(dims),
                   dropout_prob=0.1,
                   hidden_channels=dims,
                   bn=True)

  def forward(
      self,
      input_ids=None,
      attention_mask=None,
      token_type_ids=None,
      position_ids=None,
      head_mask=None,
      inputs_embeds=None,
      labels=None,
      class_weights=None,
      output_attentions=None,
      output_hidden_states=None,
      numerical_feats=None
  ):

    outputs = self.bert(
        input_ids,
        attention_mask=attention_mask,
        token_type_ids=token_type_ids,
        head_mask=head_mask,
        inputs_embeds=inputs_embeds,
        output_attentions=output_attentions,
        output_hidden_states=output_hidden_states
    )

    cls = outputs[1]

    cls = self.dropout(cls)

    numerical_feats = self.num_bn(numerical_feats)

    combined_feats = torch.cat((cls, numerical_feats), dim=1)

    logits = self.mlp(combined_feats)

    if type(logits) is tuple:
      logits, classifier_layer_outputs = logits[0], logits[1]
    else:
      classifier_layer_outputs = [combined_feats, logits]

    if labels is not None:

      loss_fct = CrossEntropyLoss(weight=class_weights)
      labels = labels.long()
      loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
    else:
      loss = None

    results = {'loss': loss,
               'logits': logits,
               'classifier_layer_outputs': classifier_layer_outputs}

    return results

# BERT без каких-либо модификаций

In [None]:
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet')

In [None]:
model = BertForSequenceClassification.from_pretrained('bert-base-uncased',
                                     num_labels = 2,
                                     output_attentions = False,
                                     output_hidden_states =  False)
model.to(device);

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, 'without')



  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.4693759312191285


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.43704127138241744
 Val F1: 0.844451252201884


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8436539643515673


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.39560892993945174


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.44227836513891816
 Val F1: 0.83871477066393


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8446659546695148


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.3419263069958343


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.46641277192303765
 Val F1: 0.8434055195059026


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8439074307304786


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.2972023509343602


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5036199235667785
 Val F1: 0.8372766779333656


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.836378334680679


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.26540054759751286


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5342520572221241
 Val F1: 0.8362944162436549


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8387096774193549


# BERT, где в текст добавлена информация

## 1. {tweet} [SEP] The author of the text is {username}

In [None]:
df['tweet_author_is'] = df.apply(lambda x: f'{x.tweet} [SEP] The author of the text is {x.user}', axis=1)

In [None]:
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_author_is')

In [None]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels = 2,
    output_attentions = False,
    output_hidden_states =  False
    )
model.to(device);

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, 'tweet_author_is')



  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.45909176199013646


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.3939999665778417
 Val F1: 0.8586387434554975


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8594641724793913


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.3499398448284719


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.37219363799652994
 Val F1: 0.8696063737477322


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8720360417325326


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.29483130477074637


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.3730594453234703
 Val F1: 0.8713839249939899


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8723318889423848


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.24988929909731786


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4316394011525867
 Val F1: 0.8707119741100324


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8704995134609148


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.22098710621502746


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4422072132404607
 Val F1: 0.868972157586456


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8719681428743816


## 2. {tweet} [SEP] {username}

In [None]:
df['tweet_author'] = df.apply(lambda x: f'{x.tweet} [SEP] {x.user}', axis=1)

In [None]:
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_author')

In [None]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels = 2,
    output_attentions = False,
    output_hidden_states =  False
    )
model.to(device);

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, 'tweet_author')



  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.4567935630419147


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4119603237471519
 Val F1: 0.8610942249240122


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8613681177005916


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.3481828663481059


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.3751022914090218
 Val F1: 0.8704010084298431


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8739701186580991


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.29136860559124045


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.38833011284422797
 Val F1: 0.8627483171892957


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8677148501810997


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.24600395511175593


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4193254336237143
 Val F1: 0.8678523008214372


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8705188020895641


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.21698504055211368


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4552193355794327
 Val F1: 0.8673191079736885


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8693099565706933


## 3. {tweet} [SEP] The sentiment is {positive/negative}

In [None]:
df['tweet_sentiment_is'] = df.apply(lambda x: f"{x.tweet} [SEP] The sentiment is {'positive' if x.user == 1 else 'negative'}", axis=1)

In [None]:
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_sentiment_is')

In [None]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels = 2,
    output_attentions = False,
    output_hidden_states =  False
    )
model.to(device);

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, 'tweet_sentiment_is')

  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.4580220839742458


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4355173653517014
 Val F1: 0.8380376878428878


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8396387794884036


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.3912133883489886


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4396180639353891
 Val F1: 0.844172164624568


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8446092548029508


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.34088990137774644


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4697905133645504
 Val F1: 0.8400597907324363


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8416461819046864


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.29341573805588445


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5160697140277196
 Val F1: 0.8327847796419684


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8358317507778092


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.2587067053231682


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5369047603259484
 Val F1: 0.8342092689918776


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8357575029049966


## 4. {tweet} [SEP] {positive/negative}

In [None]:
df['tweet_sentiment'] = df.apply(lambda x: f"{x.tweet} [SEP] {'positive' if x.user == 1 else 'negative'}", axis=1)

In [None]:
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_sentiment')

In [None]:
model = BertForSequenceClassification.from_pretrained(
    'bert-base-uncased',
    num_labels = 2,
    output_attentions = False,
    output_hidden_states =  False
    )
model.to(device);

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, 'tweet_sentiment')



  0%|          | 0/5 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.4735045466979126


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.44341929701085275
 Val F1: 0.8401896314421166


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8426940464332235


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.3979295841473795


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.4390966717440348
 Val F1: 0.8396800000000001


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8412010276172126


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.3468383516332937


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.47587409051947105
 Val F1: 0.8394137326748446


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8400368191459557


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.300859318425258


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5113386575084848
 Val F1: 0.8367474589523065


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8396868484204728


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.2676806150190703


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5290178026621922
 Val F1: 0.8330536242308001


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8336347923951928


# BERT с использованием эмбеддингов пользователей

## Эмбеддинги, полученные с помощью LDA

In [None]:
keys = ['post_lda_2', 'post_lda_5', 'post_lda_10', 'post_lda_20',
        'user_lda_2', 'user_lda_5', 'user_lda_10', 'user_lda_20']

In [None]:
for key in keys:
  print(key)

  # выбираем соответствующий словарь
  tmp_dct = knowledge_dct[key]
  user_embedding_dim = int(key.split('_')[-1])

  # создаем даталоадеры с учетом эмбеддингов для пользователей
  dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet',
                                                                      knowledge_dct=tmp_dct,
                                                                      user_embedding_dim=user_embedding_dim)

  # подготоваливаем модель
  config = BertConfig.from_pretrained(
      'bert-base-uncased',
      num_labels=2
      )

  config.numerical_feat_dim = user_embedding_dim
  config.text_feat_dim = 768 # потому что в BERT размерность скрытого слоя - 768

  model = BertConcatFeatures.from_pretrained(
      'bert-base-uncased',
      config=config
      )

  model.to(device)

  # обучаем уже на 10 эпохах
  train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, key,
                 num_epochs=10, with_additional_embeddings=True)

post_lda_2


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 770
 Hidden: [192, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6765315854242901


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6592218312315452
 Val F1: 0.8298057407944333


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8308553564288051


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6490390829201462


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6385289969352576
 Val F1: 0.8354468148590183


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8382462965733951


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6274183879651751


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6174948796247824
 Val F1: 0.8364935822637106


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8407573200264181


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6079083536610459


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6094291466168869
 Val F1: 0.8386504475556575


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8405517879161528


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5909032060587702


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5935272207626929
 Val F1: 0.8393921334922527


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8424008686861123


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5767694898599655


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5920903526055508
 Val F1: 0.8380700594844679


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8427691513296204


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5644639005509947


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5848852464785943
 Val F1: 0.8417716743378858


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8436762354374476


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5554081115348279


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.583161164743778
 Val F1: 0.8420186915887851


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.844551583863799


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5491065905798018


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5772113549785737
 Val F1: 0.8411144578313253


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8434237995824635


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5453148617095308


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5785586677300625
 Val F1: 0.8406102051551815


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8435843054082716
post_lda_5


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 773
 Hidden: [193, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.67724234864622


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6577392589205351
 Val F1: 0.821570403399986


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8239546765840604


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6507544448553038


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6417799697090418
 Val F1: 0.8366634479685061


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8407231066805534


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.627919442123837


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6218870603121244
 Val F1: 0.8362615207373271


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8393490054249548


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6077421299353776


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6038066557584665
 Val F1: 0.8387986070806733


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8437977446344125


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5899473663258705


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5925437552042496
 Val F1: 0.8392557460780737


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8424402018872065


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.575363204553655


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5839907435270456
 Val F1: 0.8393606835088753


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8448593992176545


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5634575218536966


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5787076775271159
 Val F1: 0.8399970762371173


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8437099197478837


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5542895217435528


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5700571050819678
 Val F1: 0.8407643312101911


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8434425025701279


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5472901259216286


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5687533811880991
 Val F1: 0.8418698277790203


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.846844151959318


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.543886335742813


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5763006118627695
 Val F1: 0.841025641025641


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8462026024383877
post_lda_10


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 778
 Hidden: [194, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6759617915932906


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.662298280841265
 Val F1: 0.8301666179479685


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8349187765256842


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6515507011155233


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6391947999214515
 Val F1: 0.8362950605642998


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.839253899574592


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.629119212360312


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6246583073949202
 Val F1: 0.8443032228778937


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8461363636363636


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6092493153417515


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6099175621683781
 Val F1: 0.8413782749042865


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8471013673861453


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.591847658212082


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.598365789040541
 Val F1: 0.8424175009269558


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8456610800744878


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5768297915342644


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5863072562676209
 Val F1: 0.8457211430285758


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8461480646373544


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5645066828383997


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5829158413868684
 Val F1: 0.8454205750019065


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8467868488536763


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5548910776390955


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5797798289702489
 Val F1: 0.8445186759793502


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8473067559342667


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5479595939208872


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.57170737381929
 Val F1: 0.8444138774262572


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8469767263525172


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5444667240394376


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5747204688497078
 Val F1: 0.8448367704132106


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8466646313539918
post_lda_20


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 788
 Hidden: [197, 49, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6798315828442026


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.666028439043424
 Val F1: 0.831496752535941


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8350485366031677


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6549094739148549


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6455987760653863
 Val F1: 0.8419557416267942


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8424171949435463


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6322765096105273


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6282968584161538
 Val F1: 0.8422509225092252


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8405562617974498


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6119377363757106


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6127909840299532
 Val F1: 0.8434351030230111


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8434253767990029


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5937680132461317


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6039838718298154
 Val F1: 0.8434431844128167


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8453080858211804


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5783899115301032


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5958005111569014
 Val F1: 0.841932597673725


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8435933876879348


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5660665811914386


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5877855485066389
 Val F1: 0.8426472815386945


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8438639125151882


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5563474248537128


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5854217782616615
 Val F1: 0.8417707150964812


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8448917584504367


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5502881795250023


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5786650694715672
 Val F1: 0.84166792566853


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8446969696969697


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.545946362206868


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5812509873738656
 Val F1: 0.8426124197002142


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8444018404907975
user_lda_2


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 770
 Hidden: [192, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6615928839772419


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6441852702544286
 Val F1: 0.8323931116389549


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8325764907317799


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.631826924938363


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6238360586456764
 Val F1: 0.8357990332503296


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8370604678534648


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.609632649451248


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6004491402552679
 Val F1: 0.8377856930840011


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8399004716455603


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.5904319078906947


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5845229600866636
 Val F1: 0.8435272045028143


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8425570363677434


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5738281850917718


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5784495475773628
 Val F1: 0.8380952380952381


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8388274376665572


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5597719240068185


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5693215970427562
 Val F1: 0.840493973230792


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8413128129056183


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5487862320751964


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.563611817570069
 Val F1: 0.8405011356143308


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8420085281576238


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.539748504429481


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5571048019979244
 Val F1: 0.8394498786497022


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8423032941089675


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5332241980470135


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5585344425187662
 Val F1: 0.8406291734678736


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8427869584635999


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5296548967436983


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5611362419067285
 Val F1: 0.8406268480189236


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8422262597797472
user_lda_5


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 773
 Hidden: [193, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6761711922887271


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6610784929914352
 Val F1: 0.8269163088618288


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8299655002464268


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.649681993205138


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6373084709048271
 Val F1: 0.8367258553486359


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8391244672397601


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6266506328781993


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6173511653756484
 Val F1: 0.836849710982659


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8408384531984099


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6063292842047054


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6057008940439957
 Val F1: 0.8427757793764988


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8439796071375019


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5893126291841186


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.594439246906684
 Val F1: 0.8413610251878038


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8433850348156062


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5740835499446035


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.584826450699415
 Val F1: 0.8420041383387525


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8420546932742055


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5623684517279801


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5788983800090276
 Val F1: 0.8392567467925085


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8456534594914251


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5533156042201898


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.572722468811732
 Val F1: 0.8402854410358274


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8433663877317802


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5465817855956688


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5710618760532293
 Val F1: 0.8416753304619041


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8445717261129914


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5430931414317604


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5730660163248197
 Val F1: 0.8417154284867787


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8438461823710461
user_lda_10


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 778
 Hidden: [194, 48, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6825436671684377


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6744395779111446
 Val F1: 0.8296771372348954


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.832273159318564


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6610894661030713


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6536597691667385
 Val F1: 0.8372058049477406


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8390765638534251


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.642340354628252


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.641761509462809
 Val F1: 0.841048357327427


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8413645741231948


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6251432668790563


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6287854554561468
 Val F1: 0.8397684812074145


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8433222006092415


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.609654939125392


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6217900722836837
 Val F1: 0.8426190118636432


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8417442430181284


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5974780485321339


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6160393468080423
 Val F1: 0.8417179756720228


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8436453906044308


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5858797348892065


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6159052934784156
 Val F1: 0.8408041697691735


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8437813906767366


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.578394889831543


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6118072149081107
 Val F1: 0.8408852998355018


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8433825181634333


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5726048530680636


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6072301254249536
 Val F1: 0.8421372191863996


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8420812240703383


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.568785524154826


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6094472578320748
 Val F1: 0.8421530087692773


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8434420015163001
user_lda_20


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 788
 Hidden: [197, 49, 12, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.680627444148392


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6670398908929948
 Val F1: 0.8290699393760166


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8331299685709004


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6586345906264197


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6485426513812481
 Val F1: 0.8371155568826516


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8374789601645782


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.639625090076028


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6379208071873739
 Val F1: 0.840080518899575


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.841397347281898


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6223565165989984


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6324878518397992
 Val F1: 0.8397733127253992


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8435464772054471


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.6072331758351146


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6244328539723005
 Val F1: 0.8407860585836114


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8426146515783205


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5949111388460883


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6188745939960847
 Val F1: 0.8384765919012495


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8418390457496604


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5834718445115838


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.61494630193099
 Val F1: 0.8402933692560994


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.842200180342651


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5762531768575078


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6104179381942137
 Val F1: 0.8394177273073697


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8417994878263196


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5700733564773539


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6103748348660958
 Val F1: 0.8386950268715464


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8423252279635257


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5670834448876131


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6085203091303507
 Val F1: 0.8390509294242103


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8423807898031181


## Эмбеддинги, полученные с помощью Word2Vec / GloVe

In [None]:
keys = ['glove_100', 'glove_200', 'w2v_300']

In [None]:
for key in keys:
  print(key)

  # выбираем соответствующий словарь
  tmp_dct = knowledge_dct[key]
  user_embedding_dim = int(key.split('_')[-1])

  # создаем даталоадеры с учетом эмбеддингов для пользователей
  dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet',
                                                                      knowledge_dct=tmp_dct,
                                                                      user_embedding_dim=user_embedding_dim)

  # подготоваливаем модель
  config = BertConfig.from_pretrained(
      'bert-base-uncased',
      num_labels=2
      )

  config.numerical_feat_dim = user_embedding_dim
  config.text_feat_dim = 768 # потому что в BERT размерность скрытого слоя - 768

  model = BertConcatFeatures.from_pretrained(
      'bert-base-uncased',
      config=config
      )

  model.to(device)

  # обучаем уже на 10 эпохах
  train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, key,
                 num_epochs=10, with_additional_embeddings=True)

glove_100


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 868
 Hidden: [217, 54, 13, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6814521708764321


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6660835687548686
 Val F1: 0.8393619411368233


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8398067343346193


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6535171278306823


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.642041879777725
 Val F1: 0.8365902010761824


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8375977633860636


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6288488347175692


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.625785958690521
 Val F1: 0.8426227138003326


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.845373523657706


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.6073412475397655


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6066601373828374
 Val F1: 0.8474353353792197


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8474588692242864


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5889186390886841


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.598807488878568
 Val F1: 0.8434192276808525


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8500073131490419


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.573544109902106


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5799835494313484
 Val F1: 0.8503748125937032


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8526256352343308


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.5608097783078613


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5792740883353429
 Val F1: 0.8469026548672567


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.849430870193912


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5509170430641419


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5718275415591705
 Val F1: 0.8460465457749065


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8497974217311234


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5442837980961559


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5744279961173351
 Val F1: 0.8474347434743474


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8511087645195354


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5407370718741658


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5739559559867933
 Val F1: 0.8459661317754936


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.850141096093866
glove_200


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 968
 Hidden: [242, 60, 15, 3]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6712957488986963


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6603769543461311
 Val F1: 0.8293835469320541


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8323181655320675


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6436073335898481


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6313607029807873
 Val F1: 0.841396292004635


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8426982522028023


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6200218417701423


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6166792926497948
 Val F1: 0.8465896549186962


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8475212542784587


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.5995149163065754


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5960948385107212
 Val F1: 0.8434651546687719


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8460766030698609


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5813086484417771


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5886622631014922
 Val F1: 0.8507295173961841


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8533861267789773


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5660742892925623


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5764235638273068
 Val F1: 0.8492057628370889


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8499907834101381


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.553838930346749


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5712010245292615
 Val F1: 0.8479411764705882


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8503165954940362


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5444029891458096


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5636925027729609
 Val F1: 0.8515764425936942


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8507806691449814


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5379897786648149


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5647817602715431
 Val F1: 0.8511530398322851


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8497921581844736


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.5335379449926022


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5647270035667297
 Val F1: 0.8498038344807166


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.849818746763335
w2v_300


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 1068
 Hidden: [267, 66, 16, 4]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.674554580335118


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6570249929642066
 Val F1: 0.8361532899493854


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8392870081878124


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6405901317754068


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6288091504038908
 Val F1: 0.8403677694925071


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8438533376939569


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.6108359674503214


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6039319794911605
 Val F1: 0.8482789855072465


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8516793517361505


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.5845462774551714


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5897701392189051
 Val F1: 0.845795095531819


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8502357156326606


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5627342458982003


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5757502533304386
 Val F1: 0.8476669716376944


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8512219413161725


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.5439742989667081


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5589497133325307
 Val F1: 0.8453701607425854


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8494871891912349


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.528617912853826


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5587156346211066
 Val F1: 0.8437522955997943


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8473000885216879


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.5171795649140991


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5572455859719179
 Val F1: 0.8468386902067299


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8469519121544868


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.5087436525180211


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5497111717286782
 Val F1: 0.8468982071862575


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8482815797407295


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.503768394076025


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5533126458907739
 Val F1: 0.8461712903469649


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8467435858509186


## Эмбеддинги, полученные с помощью BERT

In [None]:
key = 'bert'
user_embedding_dim = 768

print(key)

# выбираем соответствующий словарь
tmp_dct = knowledge_dct[key]

# создаем даталоадеры с учетом эмбеддингов для пользователей
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet',
                                                                    knowledge_dct=tmp_dct,
                                                                    user_embedding_dim=user_embedding_dim)

# подготоваливаем модель
config = BertConfig.from_pretrained(
    'bert-base-uncased',
    num_labels=2
    )

config.numerical_feat_dim = user_embedding_dim
config.text_feat_dim = 768 # потому что в BERT размерность скрытого слоя - 768

model = BertConcatFeatures.from_pretrained(
    'bert-base-uncased',
    config=config
    )

model.to(device)

# обучаем уже на 10 эпохах
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, key,
                num_epochs=10, with_additional_embeddings=True)

bert


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 1536
 Hidden: [384, 96, 24, 6]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6570371761302317


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6301431757135268
 Val F1: 0.8297436621707506


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8338287331917905


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6069482757587625


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5963798104188381
 Val F1: 0.8475908080059303


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8508631891590867


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.5686863575975648


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5654413565420188
 Val F1: 0.8453397562221735


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8505931739368499


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.5384932892025885


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5434355653631382
 Val F1: 0.8494152046783626


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.853022983457766


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5133994104284229


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5274880388035224
 Val F1: 0.8500848020057516


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8528868189546832


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.4933578334782953


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5177547622185487
 Val F1: 0.8472120591248353


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8507632050953549


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.476911777731363


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5101093569627175
 Val F1: 0.850965824665676


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8537811371337081


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.46415998203093245


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5065845648447672
 Val F1: 0.8523719165085388


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8524240809802878


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.45506218884437866


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5092282136663412
 Val F1: 0.8493047158403869


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8507638072855465


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.44965921157647754


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5084861872287897
 Val F1: 0.8515774196012529


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.851513990034496


# BERT, где в текст добавлена информация + эмбеддинги, полученные с помощью BERT

## 1. {tweet} [SEP] The author of the text is {username}

In [None]:
df['tweet_author_is'] = df.apply(lambda x: f'{x.tweet} [SEP] The author of the text is {x.user}', axis=1)

In [None]:
key_for_res_dct = 'tweet_author_is_bert'
key_for_knowledge_dct = 'bert'
user_embedding_dim = 768

print(key)

# выбираем соответствующий словарь
tmp_dct = knowledge_dct[key_for_knowledge_dct]

# создаем даталоадеры с учетом эмбеддингов для пользователей
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_author_is',
                                                                    knowledge_dct=tmp_dct,
                                                                    user_embedding_dim=user_embedding_dim)

# подготоваливаем модель
config = BertConfig.from_pretrained(
    'bert-base-uncased',
    num_labels=2
    )

config.numerical_feat_dim = user_embedding_dim
config.text_feat_dim = 768 # потому что в BERT размерность скрытого слоя - 768

model = BertConcatFeatures.from_pretrained(
    'bert-base-uncased',
    config=config
    )

model.to(device)

# обучаем уже на 10 эпохах
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, key_for_res_dct,
               num_epochs=10, with_additional_embeddings=True)

tweet_author_is_bert


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 1536
 Hidden: [384, 96, 24, 6]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6597207818313735


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6327155910623379
 Val F1: 0.8514068214045825


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8547308654887751


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6096760523330191


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5908254390725722
 Val F1: 0.8644980107652703


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8641226125179913


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.570771825215048


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5666044236948857
 Val F1: 0.8650316211630418


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8682941879748544


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.539156774570353


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5425200414581176
 Val F1: 0.865873076048222


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8703100128856212


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5126381552854298


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5311524739059118
 Val F1: 0.8667731872479647


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8718026565464896


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.49020106764809596


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5160216573530283
 Val F1: 0.8680329141437666


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.872897775711075


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.4722744097148091


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5105562722071623
 Val F1: 0.8687373185578273


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8730560860583858


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.45801730381446326


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5097685588093904
 Val F1: 0.8670975878921645


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8727831638685268


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.4483252911801947


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5040041820552105
 Val F1: 0.8667867491581174


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8730388512852616


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.44327417132337776


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5007401148860271
 Val F1: 0.8679571923197986


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8728886793935108


## 2. {tweet} [SEP] {username}

In [None]:
df['tweet_author'] = df.apply(lambda x: f'{x.tweet} [SEP] {x.user}', axis=1)

In [None]:
key_for_res_dct = 'tweet_author_bert'
key_for_knowledge_dct = 'bert'
user_embedding_dim = 768

print(key)

# выбираем соответствующий словарь
tmp_dct = knowledge_dct[key_for_knowledge_dct]

# создаем даталоадеры с учетом эмбеддингов для пользователей
dataloader_train, dataloader_val, dataloader_test = get_dataloaders(tokenizer, 'tweet_author',
                                                                    knowledge_dct=tmp_dct,
                                                                    user_embedding_dim=user_embedding_dim)

# подготоваливаем модель
config = BertConfig.from_pretrained(
    'bert-base-uncased',
    num_labels=2
    )

config.numerical_feat_dim = user_embedding_dim
config.text_feat_dim = 768 # потому что в BERT размерность скрытого слоя - 768

model = BertConcatFeatures.from_pretrained(
    'bert-base-uncased',
    config=config
    )

model.to(device)

# обучаем уже на 10 эпохах
# увеличиваем lr, потому что внутри MLP
train_validate(model, dataloader_train, dataloader_val, dataloader_test, res_dct, key_for_res_dct,
               num_epochs=10, with_additional_embeddings=True)

tweet_author_is_bert


Some weights of BertConcatFeatures were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight', 'mlp.bn.0.bias', 'mlp.bn.0.num_batches_tracked', 'mlp.bn.0.running_mean', 'mlp.bn.0.running_var', 'mlp.bn.0.weight', 'mlp.bn.1.bias', 'mlp.bn.1.num_batches_tracked', 'mlp.bn.1.running_mean', 'mlp.bn.1.running_var', 'mlp.bn.1.weight', 'mlp.bn.2.bias', 'mlp.bn.2.num_batches_tracked', 'mlp.bn.2.running_mean', 'mlp.bn.2.running_var', 'mlp.bn.2.weight', 'mlp.bn.3.bias', 'mlp.bn.3.num_batches_tracked', 'mlp.bn.3.running_mean', 'mlp.bn.3.running_var', 'mlp.bn.3.weight', 'mlp.layers.0.bias', 'mlp.layers.0.weight', 'mlp.layers.1.bias', 'mlp.layers.1.weight', 'mlp.layers.2.bias', 'mlp.layers.2.weight', 'mlp.layers.3.bias', 'mlp.layers.3.weight', 'mlp.layers.4.bias', 'mlp.layers.4.weight', 'num_bn.bias', 'num_bn.num_batches_tracked', 'num_bn.running_mean', 'num_bn.running_var', 'num_bn.weight']
You should probably TRAIN this mod

MLP layer sizes:
 Input: 1536
 Hidden: [384, 96, 24, 6]
 Output: 2





  0%|          | 0/10 [00:00<?, ?it/s]

Epoch: 1:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 1
Training Loss: 0.6543986937176536


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.6204922657746536
 Val F1: 0.8503411675511751


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8514559806624618


Epoch: 2:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 2
Training Loss: 0.6039462850606145


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5831254581228281
 Val F1: 0.8567434331542932


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8579497524298552


Epoch: 3:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 3
Training Loss: 0.5639188966420509


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5552590842812489
 Val F1: 0.8686405018743784


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8678222357971901


Epoch: 4:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 4
Training Loss: 0.5303591283720716


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5365203683957075
 Val F1: 0.8698538127222443


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8676784443917477


Epoch: 5:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 5
Training Loss: 0.5025029784774868


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5137235666505802
 Val F1: 0.8703982265853852


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8674413089874318


Epoch: 6:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 6
Training Loss: 0.4782101658367038


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.5048230772790236
 Val F1: 0.8709049879386819


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8674136321195144


Epoch: 7:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 7
Training Loss: 0.45927923519585084


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.49500315483563984
 Val F1: 0.8709451575262545


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8704832136619445


Epoch: 8:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 8
Training Loss: 0.4450793907771535


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.49323835281225353
 Val F1: 0.8692434857907582


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8691717489671453


Epoch: 9:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 9
Training Loss: 0.4349160470937347


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.48569242455638373
 Val F1: 0.8707161427558497


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8683061184339956


Epoch: 10:   0%|          | 0/2178 [00:00<?, ?it/s]


Epoch 10
Training Loss: 0.4281331981344985


  0%|          | 0/312 [00:00<?, ?it/s]

Val Loss:0.48966694384431225
 Val F1: 0.8695855771482088


  0%|          | 0/623 [00:00<?, ?it/s]

Test F1: 0.8694058942772493


In [None]:
# сохраним словарь
with open('res.pickle', 'wb') as f:
  pickle.dump(res_dct, f)