In [1]:
import numpy as np
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import pandas as pd
from transformers import Trainer, TrainingArguments, BertTokenizer, BertForSequenceClassification
from datasets import Dataset
import torch
from torch.nn import CrossEntropyLoss

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def balance_dataset(df, target_col):
    a, b = sorted(dict(df[target_col].value_counts()).items())
    if (a[1] > b[1]):
        tmp = df.loc[df[target_col] == 1].sample(n = abs(a[1] - b[1]))
    else:
        tmp = df.loc[df[target_col] == 0].sample(n = abs(a[1] - b[1]))
    df = pd.concat([df, tmp], ignore_index=True)
    return df

def remove_some(s, lst = ['игра', 'это', 'всё', 'который', 'весь']):
    for i in lst:
        s = s.replace(i, '')
    return s

def drop_trash(df):
    df = df.dropna()
    empty = df[(df.text == '') | (df.text == ' ')]
    df = df.drop(empty.index)
    df = df.drop(df[df.text.str.split().apply(len) == 1].text.index)
    df.text = df.text.apply(remove_some)
    df = df.drop_duplicates(subset='text', keep="last")
    return df

In [3]:
file_path = "cleaned_bin_df_politics.csv"
df = pd.read_csv(file_path, index_col=0)
df = balance_dataset(drop_trash(df), 'label')
df.head()

Unnamed: 0,text,label,assessment
0,многий прохождение прочитать титр узнать имя...,1.0,1.0
1,узнать лента новость человек следить отечеств...,1.0,0.0
2,пройти российский студия разработчик описывать...,1.0,0.0
3,купить подборка порыв поддержать отечественны...,1.0,0.0
4,совет проверить товарищ третий день купить под...,1.0,1.0


In [4]:
df.label.value_counts()

label
1.0    7081
0.0    7081
Name: count, dtype: int64

In [5]:
X = df['text']
y = df['label'].astype(int)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X

0        многий  прохождение  прочитать титр узнать имя...
1        узнать  лента новость человек следить отечеств...
2        пройти российский студия разработчик описывать...
3        купить  подборка порыв поддержать отечественны...
4        совет проверить товарищ третий день купить под...
                               ...                        
14157    ваш флип керкоп похожий чорт сатанист просто п...
14158    история нс год назад выбрать ноутбук город ост...
14159    россия рулить флаг азербайджан алый сердце фла...
14160    проходить  наверное переставать надоедать  пог...
14161    слушать неплохой идея обзор посветить обзор се...
Name: text, Length: 14162, dtype: object

In [6]:
model_name = "cointegrated/rubert-tiny2"
tokenizer = BertTokenizer.from_pretrained(model_name)
model = BertForSequenceClassification.from_pretrained(model_name, num_labels=2)

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at cointegrated/rubert-tiny2 and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [9]:
from safetensors import safe_open
tensors = {}
with safe_open(r"C:\Users\zlata\Documents\parser_ign\models\my_model_v2\model.safetensors", framework="pt", device='cpu') as f:
    for k in f.keys():
        tensors[k] = f.get_tensor(k)

In [10]:
model.load_state_dict(tensors)
model.eval()

BertForSequenceClassification(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(83828, 312, padding_idx=0)
      (position_embeddings): Embedding(2048, 312)
      (token_type_embeddings): Embedding(2, 312)
      (LayerNorm): LayerNorm((312,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-2): 3 x BertLayer(
          (attention): BertAttention(
            (self): BertSdpaSelfAttention(
              (query): Linear(in_features=312, out_features=312, bias=True)
              (key): Linear(in_features=312, out_features=312, bias=True)
              (value): Linear(in_features=312, out_features=312, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=312, out_features=312, bias=True)
              (LayerNorm): LayerNorm((312,), eps=1e-

In [11]:
train_data = Dataset.from_pandas(pd.DataFrame({'text': X_train, 'label': y_train}))
test_data = Dataset.from_pandas(pd.DataFrame({'text': X_test, 'label': y_test}))

def tokenize(batch):
    return tokenizer(batch['text'], padding=True, truncation=True, max_length=512)

train_data = train_data.map(tokenize, batched=True, batch_size=len(train_data))
test_data = test_data.map(tokenize, batched=True, batch_size=len(test_data))

train_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])
test_data.set_format('torch', columns=['input_ids', 'attention_mask', 'label'])

Map: 100%|██████████| 11329/11329 [00:23<00:00, 488.97 examples/s]
Map: 100%|██████████| 2833/2833 [00:06<00:00, 469.61 examples/s]


In [19]:
training_args = TrainingArguments(
    output_dir='./results_new_v2',
    evaluation_strategy='epoch',
    save_strategy='epoch',
    logging_strategy='steps',
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=1,
    load_best_model_at_end=True,
    metric_for_best_model='f1',
    greater_is_better=True,
    logging_steps=10,
    report_to="none"  
)

def compute_metrics(p):
    pred, labels = p
    pred = np.argmax(pred, axis=1)
    f1 = f1_score(labels, pred)
    return {'f1': f1}

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_data,
    eval_dataset=test_data,
    compute_metrics=compute_metrics,
)

In [20]:
trainer.train()

  1%|          | 10/1417 [01:36<3:44:47,  9.59s/it]

{'loss': 0.0004, 'grad_norm': 0.183873251080513, 'learning_rate': 4.964714184897671e-05, 'epoch': 0.01}


  1%|▏         | 20/1417 [03:12<3:45:07,  9.67s/it]

{'loss': 0.0025, 'grad_norm': 0.003003583522513509, 'learning_rate': 4.929428369795343e-05, 'epoch': 0.01}


  2%|▏         | 30/1417 [04:50<3:47:37,  9.85s/it]

{'loss': 0.0003, 'grad_norm': 0.002846792805939913, 'learning_rate': 4.894142554693014e-05, 'epoch': 0.02}


  3%|▎         | 40/1417 [06:27<3:43:33,  9.74s/it]

{'loss': 0.0037, 'grad_norm': 0.0023869734723120928, 'learning_rate': 4.8588567395906846e-05, 'epoch': 0.03}


  4%|▎         | 50/1417 [08:04<3:41:33,  9.72s/it]

{'loss': 0.0369, 'grad_norm': 0.0015600974438712, 'learning_rate': 4.823570924488356e-05, 'epoch': 0.04}


  4%|▍         | 60/1417 [09:41<3:41:37,  9.80s/it]

{'loss': 0.1481, 'grad_norm': 2.105170726776123, 'learning_rate': 4.788285109386027e-05, 'epoch': 0.04}


  5%|▍         | 70/1417 [11:19<3:39:26,  9.77s/it]

{'loss': 0.0297, 'grad_norm': 0.01046033576130867, 'learning_rate': 4.752999294283698e-05, 'epoch': 0.05}


  6%|▌         | 80/1417 [12:55<3:35:00,  9.65s/it]

{'loss': 0.0309, 'grad_norm': 195.3047637939453, 'learning_rate': 4.7177134791813696e-05, 'epoch': 0.06}


  6%|▋         | 90/1417 [14:33<3:37:37,  9.84s/it]

{'loss': 0.0001, 'grad_norm': 0.0014970668125897646, 'learning_rate': 4.6824276640790405e-05, 'epoch': 0.06}


  7%|▋         | 100/1417 [16:10<3:34:12,  9.76s/it]

{'loss': 0.0001, 'grad_norm': 0.0030308058485388756, 'learning_rate': 4.6471418489767114e-05, 'epoch': 0.07}


  8%|▊         | 110/1417 [17:47<3:26:29,  9.48s/it]

{'loss': 0.089, 'grad_norm': 37.682552337646484, 'learning_rate': 4.6118560338743824e-05, 'epoch': 0.08}


  8%|▊         | 120/1417 [19:22<3:27:46,  9.61s/it]

{'loss': 0.0001, 'grad_norm': 0.0033756420016288757, 'learning_rate': 4.576570218772054e-05, 'epoch': 0.08}


  9%|▉         | 130/1417 [20:59<3:28:27,  9.72s/it]

{'loss': 0.004, 'grad_norm': 0.03756621107459068, 'learning_rate': 4.541284403669725e-05, 'epoch': 0.09}


 10%|▉         | 140/1417 [22:35<3:26:02,  9.68s/it]

{'loss': 0.0001, 'grad_norm': 0.001117009436711669, 'learning_rate': 4.505998588567396e-05, 'epoch': 0.1}


 11%|█         | 150/1417 [24:11<3:23:54,  9.66s/it]

{'loss': 0.0001, 'grad_norm': 0.0019954873714596033, 'learning_rate': 4.4707127734650674e-05, 'epoch': 0.11}


 11%|█▏        | 160/1417 [25:49<3:25:44,  9.82s/it]

{'loss': 0.0001, 'grad_norm': 0.0010505560785531998, 'learning_rate': 4.435426958362738e-05, 'epoch': 0.11}


 12%|█▏        | 170/1417 [27:26<3:20:05,  9.63s/it]

{'loss': 0.0003, 'grad_norm': 0.0008782704826444387, 'learning_rate': 4.400141143260409e-05, 'epoch': 0.12}


 13%|█▎        | 180/1417 [29:04<3:21:10,  9.76s/it]

{'loss': 0.0196, 'grad_norm': 0.0011006226995959878, 'learning_rate': 4.364855328158081e-05, 'epoch': 0.13}


 13%|█▎        | 190/1417 [30:41<3:17:22,  9.65s/it]

{'loss': 0.0, 'grad_norm': 0.0008598631247878075, 'learning_rate': 4.329569513055752e-05, 'epoch': 0.13}


 14%|█▍        | 200/1417 [32:18<3:15:46,  9.65s/it]

{'loss': 0.0, 'grad_norm': 0.0012475794646888971, 'learning_rate': 4.2942836979534227e-05, 'epoch': 0.14}


 15%|█▍        | 210/1417 [33:55<3:16:20,  9.76s/it]

{'loss': 0.0, 'grad_norm': 0.0011893515475094318, 'learning_rate': 4.258997882851094e-05, 'epoch': 0.15}


 16%|█▌        | 220/1417 [35:33<3:14:52,  9.77s/it]

{'loss': 0.0, 'grad_norm': 0.0007991756428964436, 'learning_rate': 4.223712067748765e-05, 'epoch': 0.16}


 16%|█▌        | 230/1417 [37:10<3:10:53,  9.65s/it]

{'loss': 0.0, 'grad_norm': 0.0008163451566360891, 'learning_rate': 4.188426252646436e-05, 'epoch': 0.16}


 17%|█▋        | 240/1417 [38:48<3:12:55,  9.83s/it]

{'loss': 0.0001, 'grad_norm': 0.0007358483271673322, 'learning_rate': 4.153140437544108e-05, 'epoch': 0.17}


 18%|█▊        | 250/1417 [40:26<3:11:28,  9.84s/it]

{'loss': 0.1105, 'grad_norm': 0.005847926717251539, 'learning_rate': 4.1178546224417786e-05, 'epoch': 0.18}


 18%|█▊        | 260/1417 [42:02<3:06:14,  9.66s/it]

{'loss': 0.0001, 'grad_norm': 0.0012906348565593362, 'learning_rate': 4.0825688073394495e-05, 'epoch': 0.18}


 19%|█▉        | 270/1417 [43:39<3:07:17,  9.80s/it]

{'loss': 0.0005, 'grad_norm': 0.0007570432499051094, 'learning_rate': 4.047282992237121e-05, 'epoch': 0.19}


 20%|█▉        | 280/1417 [45:16<3:03:25,  9.68s/it]

{'loss': 0.0863, 'grad_norm': 231.50747680664062, 'learning_rate': 4.011997177134792e-05, 'epoch': 0.2}


 20%|██        | 290/1417 [46:53<3:02:44,  9.73s/it]

{'loss': 0.0266, 'grad_norm': 0.03205006942152977, 'learning_rate': 3.976711362032463e-05, 'epoch': 0.2}


 21%|██        | 300/1417 [48:31<3:01:25,  9.74s/it]

{'loss': 0.1127, 'grad_norm': 0.0006565731018781662, 'learning_rate': 3.9414255469301345e-05, 'epoch': 0.21}


 22%|██▏       | 310/1417 [50:08<2:59:07,  9.71s/it]

{'loss': 0.0082, 'grad_norm': 0.0019602214451879263, 'learning_rate': 3.9061397318278055e-05, 'epoch': 0.22}


 23%|██▎       | 320/1417 [51:46<2:58:47,  9.78s/it]

{'loss': 0.1272, 'grad_norm': 88.70704650878906, 'learning_rate': 3.8708539167254764e-05, 'epoch': 0.23}


 23%|██▎       | 330/1417 [53:24<2:57:04,  9.77s/it]

{'loss': 0.0817, 'grad_norm': 0.000850665383040905, 'learning_rate': 3.835568101623147e-05, 'epoch': 0.23}


 24%|██▍       | 340/1417 [55:00<2:53:51,  9.69s/it]

{'loss': 0.2808, 'grad_norm': 0.010721120052039623, 'learning_rate': 3.800282286520819e-05, 'epoch': 0.24}


 25%|██▍       | 350/1417 [56:38<2:55:32,  9.87s/it]

{'loss': 0.0217, 'grad_norm': 1.1191174983978271, 'learning_rate': 3.76499647141849e-05, 'epoch': 0.25}


 25%|██▌       | 360/1417 [58:16<2:53:32,  9.85s/it]

{'loss': 0.0415, 'grad_norm': 0.0016102901427075267, 'learning_rate': 3.729710656316161e-05, 'epoch': 0.25}


 26%|██▌       | 370/1417 [59:53<2:51:13,  9.81s/it]

{'loss': 0.0268, 'grad_norm': 203.13587951660156, 'learning_rate': 3.694424841213832e-05, 'epoch': 0.26}


 27%|██▋       | 380/1417 [1:01:31<2:50:17,  9.85s/it]

{'loss': 0.0001, 'grad_norm': 0.03483875095844269, 'learning_rate': 3.659139026111503e-05, 'epoch': 0.27}


 28%|██▊       | 390/1417 [1:03:09<2:47:13,  9.77s/it]

{'loss': 0.0016, 'grad_norm': 6.355737209320068, 'learning_rate': 3.623853211009174e-05, 'epoch': 0.28}


 28%|██▊       | 400/1417 [1:04:46<2:44:03,  9.68s/it]

{'loss': 0.0077, 'grad_norm': 0.019637824967503548, 'learning_rate': 3.588567395906846e-05, 'epoch': 0.28}


 29%|██▉       | 410/1417 [1:06:23<2:43:26,  9.74s/it]

{'loss': 0.0097, 'grad_norm': 0.0012143392814323306, 'learning_rate': 3.553281580804517e-05, 'epoch': 0.29}


 30%|██▉       | 420/1417 [1:08:01<2:41:18,  9.71s/it]

{'loss': 0.0529, 'grad_norm': 0.004865053575485945, 'learning_rate': 3.5179957657021876e-05, 'epoch': 0.3}


 30%|███       | 430/1417 [1:09:38<2:40:47,  9.77s/it]

{'loss': 0.0003, 'grad_norm': 0.0016735513927415013, 'learning_rate': 3.482709950599859e-05, 'epoch': 0.3}


 31%|███       | 440/1417 [1:11:16<2:40:11,  9.84s/it]

{'loss': 0.0001, 'grad_norm': 0.0018639231566339731, 'learning_rate': 3.44742413549753e-05, 'epoch': 0.31}


 32%|███▏      | 450/1417 [1:12:52<2:36:58,  9.74s/it]

{'loss': 0.0002, 'grad_norm': 0.08259361237287521, 'learning_rate': 3.412138320395201e-05, 'epoch': 0.32}


 32%|███▏      | 460/1417 [1:14:30<2:36:53,  9.84s/it]

{'loss': 0.0001, 'grad_norm': 0.0038616396486759186, 'learning_rate': 3.3768525052928726e-05, 'epoch': 0.32}


 33%|███▎      | 470/1417 [1:16:08<2:34:31,  9.79s/it]

{'loss': 0.0015, 'grad_norm': 0.001820868463255465, 'learning_rate': 3.3415666901905435e-05, 'epoch': 0.33}


 34%|███▍      | 480/1417 [1:17:46<2:32:35,  9.77s/it]

{'loss': 0.0617, 'grad_norm': 0.0018961194436997175, 'learning_rate': 3.3062808750882144e-05, 'epoch': 0.34}


 35%|███▍      | 490/1417 [1:19:24<2:32:52,  9.90s/it]

{'loss': 0.0046, 'grad_norm': 0.0031769885681569576, 'learning_rate': 3.270995059985886e-05, 'epoch': 0.35}


 35%|███▌      | 500/1417 [1:21:01<2:27:29,  9.65s/it]

{'loss': 0.0001, 'grad_norm': 0.04862971976399422, 'learning_rate': 3.235709244883557e-05, 'epoch': 0.35}


 36%|███▌      | 510/1417 [1:22:38<2:27:52,  9.78s/it]

{'loss': 0.1194, 'grad_norm': 0.0010436129523441195, 'learning_rate': 3.200423429781228e-05, 'epoch': 0.36}


 37%|███▋      | 520/1417 [1:24:15<2:25:46,  9.75s/it]

{'loss': 0.0001, 'grad_norm': 0.020268721505999565, 'learning_rate': 3.1651376146788995e-05, 'epoch': 0.37}


 37%|███▋      | 530/1417 [1:25:53<2:24:28,  9.77s/it]

{'loss': 0.1199, 'grad_norm': 0.020738210529088974, 'learning_rate': 3.1298517995765704e-05, 'epoch': 0.37}


 38%|███▊      | 540/1417 [1:27:30<2:22:00,  9.72s/it]

{'loss': 0.0872, 'grad_norm': 0.6900027394294739, 'learning_rate': 3.094565984474241e-05, 'epoch': 0.38}


 39%|███▉      | 550/1417 [1:29:08<2:21:26,  9.79s/it]

{'loss': 0.1274, 'grad_norm': 0.0035546065773814917, 'learning_rate': 3.059280169371912e-05, 'epoch': 0.39}


 40%|███▉      | 560/1417 [1:30:45<2:19:35,  9.77s/it]

{'loss': 0.0072, 'grad_norm': 0.0027078797575086355, 'learning_rate': 3.0239943542695838e-05, 'epoch': 0.4}


 40%|████      | 570/1417 [1:32:22<2:17:09,  9.72s/it]

{'loss': 0.0001, 'grad_norm': 0.008040796965360641, 'learning_rate': 2.9887085391672547e-05, 'epoch': 0.4}


 41%|████      | 580/1417 [1:34:00<2:16:26,  9.78s/it]

{'loss': 0.0004, 'grad_norm': 0.002493085339665413, 'learning_rate': 2.953422724064926e-05, 'epoch': 0.41}


 42%|████▏     | 590/1417 [1:35:37<2:13:39,  9.70s/it]

{'loss': 0.0002, 'grad_norm': 0.0021040583960711956, 'learning_rate': 2.918136908962597e-05, 'epoch': 0.42}


 42%|████▏     | 600/1417 [1:37:14<2:11:42,  9.67s/it]

{'loss': 0.0045, 'grad_norm': 0.002769617596641183, 'learning_rate': 2.882851093860268e-05, 'epoch': 0.42}


 43%|████▎     | 610/1417 [1:38:51<2:09:22,  9.62s/it]

{'loss': 0.0001, 'grad_norm': 0.0018025306053459644, 'learning_rate': 2.8475652787579394e-05, 'epoch': 0.43}


 44%|████▍     | 620/1417 [1:40:27<2:04:37,  9.38s/it]

{'loss': 0.0771, 'grad_norm': 0.028904296457767487, 'learning_rate': 2.8122794636556103e-05, 'epoch': 0.44}


 44%|████▍     | 630/1417 [1:41:40<1:26:46,  6.62s/it]

{'loss': 0.0264, 'grad_norm': 0.05668500438332558, 'learning_rate': 2.7769936485532816e-05, 'epoch': 0.44}


 45%|████▌     | 640/1417 [1:42:43<1:22:28,  6.37s/it]

{'loss': 0.0135, 'grad_norm': 0.0017714414279907942, 'learning_rate': 2.741707833450953e-05, 'epoch': 0.45}


 46%|████▌     | 650/1417 [1:43:47<1:21:17,  6.36s/it]

{'loss': 0.0741, 'grad_norm': 17.578969955444336, 'learning_rate': 2.7064220183486238e-05, 'epoch': 0.46}


 47%|████▋     | 660/1417 [1:44:51<1:19:29,  6.30s/it]

{'loss': 0.0811, 'grad_norm': 0.0020628906786441803, 'learning_rate': 2.671136203246295e-05, 'epoch': 0.47}


 47%|████▋     | 670/1417 [1:45:55<1:19:34,  6.39s/it]

{'loss': 0.0013, 'grad_norm': 0.002803436014801264, 'learning_rate': 2.6358503881439663e-05, 'epoch': 0.47}


 48%|████▊     | 680/1417 [1:46:59<1:17:55,  6.34s/it]

{'loss': 0.0001, 'grad_norm': 0.02319454960525036, 'learning_rate': 2.6005645730416372e-05, 'epoch': 0.48}


 49%|████▊     | 690/1417 [1:48:03<1:18:41,  6.49s/it]

{'loss': 0.001, 'grad_norm': 1.1238090991973877, 'learning_rate': 2.5652787579393084e-05, 'epoch': 0.49}


 49%|████▉     | 700/1417 [1:49:07<1:15:49,  6.34s/it]

{'loss': 0.0001, 'grad_norm': 0.031862154603004456, 'learning_rate': 2.5299929428369794e-05, 'epoch': 0.49}


 50%|█████     | 710/1417 [1:50:10<1:14:28,  6.32s/it]

{'loss': 0.0001, 'grad_norm': 0.001878215349279344, 'learning_rate': 2.494707127734651e-05, 'epoch': 0.5}


 51%|█████     | 720/1417 [1:51:13<1:13:14,  6.30s/it]

{'loss': 0.1099, 'grad_norm': 0.009782470762729645, 'learning_rate': 2.4594213126323222e-05, 'epoch': 0.51}


 52%|█████▏    | 730/1417 [1:52:17<1:15:03,  6.56s/it]

{'loss': 0.0075, 'grad_norm': 0.003362894058227539, 'learning_rate': 2.424135497529993e-05, 'epoch': 0.52}


 52%|█████▏    | 740/1417 [1:53:21<1:12:17,  6.41s/it]

{'loss': 0.0135, 'grad_norm': 0.1389874964952469, 'learning_rate': 2.3888496824276644e-05, 'epoch': 0.52}


 53%|█████▎    | 750/1417 [1:54:25<1:10:50,  6.37s/it]

{'loss': 0.0001, 'grad_norm': 0.027956178411841393, 'learning_rate': 2.3535638673253353e-05, 'epoch': 0.53}


 54%|█████▎    | 760/1417 [1:55:28<1:09:13,  6.32s/it]

{'loss': 0.1189, 'grad_norm': 3.18697190284729, 'learning_rate': 2.3182780522230066e-05, 'epoch': 0.54}


 54%|█████▍    | 770/1417 [1:56:31<1:07:40,  6.28s/it]

{'loss': 0.0299, 'grad_norm': 0.002453879453241825, 'learning_rate': 2.2829922371206778e-05, 'epoch': 0.54}


 55%|█████▌    | 780/1417 [1:57:34<1:06:29,  6.26s/it]

{'loss': 0.0014, 'grad_norm': 0.012572539038956165, 'learning_rate': 2.2477064220183487e-05, 'epoch': 0.55}


 56%|█████▌    | 790/1417 [1:58:37<1:06:32,  6.37s/it]

{'loss': 0.0001, 'grad_norm': 0.0017969547770917416, 'learning_rate': 2.21242060691602e-05, 'epoch': 0.56}


 56%|█████▋    | 800/1417 [1:59:40<1:05:11,  6.34s/it]

{'loss': 0.0001, 'grad_norm': 0.0080570662394166, 'learning_rate': 2.1771347918136913e-05, 'epoch': 0.56}


 57%|█████▋    | 810/1417 [2:00:44<1:04:02,  6.33s/it]

{'loss': 0.1087, 'grad_norm': 0.0033995395060628653, 'learning_rate': 2.141848976711362e-05, 'epoch': 0.57}


 58%|█████▊    | 820/1417 [2:01:48<1:04:11,  6.45s/it]

{'loss': 0.0039, 'grad_norm': 0.039626456797122955, 'learning_rate': 2.1065631616090334e-05, 'epoch': 0.58}


 59%|█████▊    | 830/1417 [2:02:52<1:02:36,  6.40s/it]

{'loss': 0.0758, 'grad_norm': 0.0014985789312049747, 'learning_rate': 2.0712773465067047e-05, 'epoch': 0.59}


 59%|█████▉    | 840/1417 [2:03:55<1:01:05,  6.35s/it]

{'loss': 0.0001, 'grad_norm': 0.004698035307228565, 'learning_rate': 2.0359915314043756e-05, 'epoch': 0.59}


 60%|█████▉    | 850/1417 [2:04:59<1:00:21,  6.39s/it]

{'loss': 0.0001, 'grad_norm': 0.009124692529439926, 'learning_rate': 2.000705716302047e-05, 'epoch': 0.6}


 61%|██████    | 860/1417 [2:06:03<58:45,  6.33s/it]  

{'loss': 0.0444, 'grad_norm': 0.16336461901664734, 'learning_rate': 1.9654199011997178e-05, 'epoch': 0.61}


 61%|██████▏   | 870/1417 [2:07:06<57:59,  6.36s/it]

{'loss': 0.0001, 'grad_norm': 0.26460644602775574, 'learning_rate': 1.930134086097389e-05, 'epoch': 0.61}


 62%|██████▏   | 880/1417 [2:08:11<57:28,  6.42s/it]  

{'loss': 0.1427, 'grad_norm': 0.004610004834830761, 'learning_rate': 1.8948482709950603e-05, 'epoch': 0.62}


 63%|██████▎   | 890/1417 [2:09:14<56:19,  6.41s/it]

{'loss': 0.0001, 'grad_norm': 0.013547521084547043, 'learning_rate': 1.8595624558927312e-05, 'epoch': 0.63}


 64%|██████▎   | 900/1417 [2:10:17<54:27,  6.32s/it]

{'loss': 0.0174, 'grad_norm': 0.0015877580735832453, 'learning_rate': 1.8242766407904025e-05, 'epoch': 0.64}


 64%|██████▍   | 910/1417 [2:11:22<54:13,  6.42s/it]

{'loss': 0.1629, 'grad_norm': 0.0065276059322059155, 'learning_rate': 1.7889908256880737e-05, 'epoch': 0.64}


 65%|██████▍   | 920/1417 [2:12:26<52:56,  6.39s/it]

{'loss': 0.06, 'grad_norm': 41.63187789916992, 'learning_rate': 1.7537050105857446e-05, 'epoch': 0.65}


 66%|██████▌   | 930/1417 [2:13:30<51:27,  6.34s/it]

{'loss': 0.1119, 'grad_norm': 0.0015232076402753592, 'learning_rate': 1.718419195483416e-05, 'epoch': 0.66}


 66%|██████▋   | 940/1417 [2:14:33<50:33,  6.36s/it]

{'loss': 0.0012, 'grad_norm': 0.002064143307507038, 'learning_rate': 1.683133380381087e-05, 'epoch': 0.66}


 67%|██████▋   | 950/1417 [2:15:36<49:14,  6.33s/it]

{'loss': 0.0002, 'grad_norm': 0.002190003637224436, 'learning_rate': 1.647847565278758e-05, 'epoch': 0.67}


 68%|██████▊   | 960/1417 [2:16:40<48:33,  6.38s/it]

{'loss': 0.1903, 'grad_norm': 0.036221496760845184, 'learning_rate': 1.6125617501764293e-05, 'epoch': 0.68}


 68%|██████▊   | 970/1417 [2:17:43<46:34,  6.25s/it]

{'loss': 0.0004, 'grad_norm': 0.0021590113174170256, 'learning_rate': 1.5772759350741002e-05, 'epoch': 0.68}


 69%|██████▉   | 980/1417 [2:18:47<46:29,  6.38s/it]

{'loss': 0.1096, 'grad_norm': 0.0021397273521870375, 'learning_rate': 1.5419901199717715e-05, 'epoch': 0.69}


 70%|██████▉   | 990/1417 [2:19:50<45:08,  6.34s/it]

{'loss': 0.0005, 'grad_norm': 0.002098680241033435, 'learning_rate': 1.5067043048694426e-05, 'epoch': 0.7}


 71%|███████   | 1000/1417 [2:20:54<44:21,  6.38s/it]

{'loss': 0.0163, 'grad_norm': 0.01717069186270237, 'learning_rate': 1.4714184897671138e-05, 'epoch': 0.71}


 71%|███████▏  | 1010/1417 [2:21:57<42:54,  6.32s/it]

{'loss': 0.0004, 'grad_norm': 0.008355479687452316, 'learning_rate': 1.436132674664785e-05, 'epoch': 0.71}


 72%|███████▏  | 1020/1417 [2:23:01<42:13,  6.38s/it]

{'loss': 0.0507, 'grad_norm': 0.003616353962570429, 'learning_rate': 1.400846859562456e-05, 'epoch': 0.72}


 73%|███████▎  | 1030/1417 [2:24:05<41:21,  6.41s/it]

{'loss': 0.0909, 'grad_norm': 0.0076441518031060696, 'learning_rate': 1.3655610444601271e-05, 'epoch': 0.73}


 73%|███████▎  | 1040/1417 [2:25:08<39:53,  6.35s/it]

{'loss': 0.0003, 'grad_norm': 1.2078839540481567, 'learning_rate': 1.3302752293577984e-05, 'epoch': 0.73}


 74%|███████▍  | 1050/1417 [2:26:13<39:29,  6.46s/it]

{'loss': 0.0617, 'grad_norm': 0.002605792600661516, 'learning_rate': 1.2949894142554694e-05, 'epoch': 0.74}


 75%|███████▍  | 1060/1417 [2:27:16<37:23,  6.29s/it]

{'loss': 0.0003, 'grad_norm': 0.1996258795261383, 'learning_rate': 1.2597035991531405e-05, 'epoch': 0.75}


 76%|███████▌  | 1070/1417 [2:28:20<37:25,  6.47s/it]

{'loss': 0.0065, 'grad_norm': 0.009950995445251465, 'learning_rate': 1.2244177840508118e-05, 'epoch': 0.76}


 76%|███████▌  | 1080/1417 [2:29:23<35:35,  6.34s/it]

{'loss': 0.005, 'grad_norm': 0.06921245902776718, 'learning_rate': 1.1891319689484829e-05, 'epoch': 0.76}


 77%|███████▋  | 1090/1417 [2:30:27<34:38,  6.36s/it]

{'loss': 0.0006, 'grad_norm': 0.17747622728347778, 'learning_rate': 1.153846153846154e-05, 'epoch': 0.77}


 78%|███████▊  | 1100/1417 [2:31:30<33:11,  6.28s/it]

{'loss': 0.0014, 'grad_norm': 0.0026273224502801895, 'learning_rate': 1.118560338743825e-05, 'epoch': 0.78}


 78%|███████▊  | 1110/1417 [2:32:33<32:29,  6.35s/it]

{'loss': 0.1789, 'grad_norm': 65.45846557617188, 'learning_rate': 1.0832745236414963e-05, 'epoch': 0.78}


 79%|███████▉  | 1120/1417 [2:33:37<31:24,  6.35s/it]

{'loss': 0.1099, 'grad_norm': 0.02835816517472267, 'learning_rate': 1.0479887085391674e-05, 'epoch': 0.79}


 80%|███████▉  | 1130/1417 [2:34:41<30:41,  6.42s/it]

{'loss': 0.1183, 'grad_norm': 0.0031792428344488144, 'learning_rate': 1.0127028934368385e-05, 'epoch': 0.8}


 80%|████████  | 1140/1417 [2:35:44<29:08,  6.31s/it]

{'loss': 0.0011, 'grad_norm': 0.009639560244977474, 'learning_rate': 9.774170783345096e-06, 'epoch': 0.8}


 81%|████████  | 1150/1417 [2:36:49<28:40,  6.44s/it]

{'loss': 0.0761, 'grad_norm': 0.003373996587470174, 'learning_rate': 9.421312632321808e-06, 'epoch': 0.81}


 82%|████████▏ | 1160/1417 [2:37:53<27:17,  6.37s/it]

{'loss': 0.0021, 'grad_norm': 5.22794246673584, 'learning_rate': 9.068454481298519e-06, 'epoch': 0.82}


 83%|████████▎ | 1170/1417 [2:38:56<26:11,  6.36s/it]

{'loss': 0.1146, 'grad_norm': 0.012873332016170025, 'learning_rate': 8.71559633027523e-06, 'epoch': 0.83}


 83%|████████▎ | 1180/1417 [2:40:00<25:05,  6.35s/it]

{'loss': 0.0022, 'grad_norm': 0.0037884535267949104, 'learning_rate': 8.362738179251942e-06, 'epoch': 0.83}


 84%|████████▍ | 1190/1417 [2:41:03<23:45,  6.28s/it]

{'loss': 0.0002, 'grad_norm': 0.034649018198251724, 'learning_rate': 8.009880028228653e-06, 'epoch': 0.84}


 85%|████████▍ | 1200/1417 [2:42:06<23:02,  6.37s/it]

{'loss': 0.0853, 'grad_norm': 0.0030350396409630775, 'learning_rate': 7.657021877205364e-06, 'epoch': 0.85}


 85%|████████▌ | 1210/1417 [2:43:10<21:58,  6.37s/it]

{'loss': 0.0006, 'grad_norm': 0.002379028359428048, 'learning_rate': 7.304163726182076e-06, 'epoch': 0.85}


 86%|████████▌ | 1220/1417 [2:44:13<20:38,  6.29s/it]

{'loss': 0.0015, 'grad_norm': 0.0032265125773847103, 'learning_rate': 6.951305575158787e-06, 'epoch': 0.86}


 87%|████████▋ | 1230/1417 [2:45:16<19:53,  6.38s/it]

{'loss': 0.1085, 'grad_norm': 0.006363397464156151, 'learning_rate': 6.5984474241354985e-06, 'epoch': 0.87}


 88%|████████▊ | 1240/1417 [2:46:21<18:53,  6.41s/it]

{'loss': 0.1958, 'grad_norm': 13.09829044342041, 'learning_rate': 6.245589273112209e-06, 'epoch': 0.88}


 88%|████████▊ | 1250/1417 [2:47:25<17:39,  6.35s/it]

{'loss': 0.1441, 'grad_norm': 0.016881188377738, 'learning_rate': 5.892731122088921e-06, 'epoch': 0.88}


 89%|████████▉ | 1260/1417 [2:48:29<16:41,  6.38s/it]

{'loss': 0.0928, 'grad_norm': 0.002595249330624938, 'learning_rate': 5.539872971065632e-06, 'epoch': 0.89}


 90%|████████▉ | 1270/1417 [2:49:35<15:44,  6.43s/it]

{'loss': 0.0939, 'grad_norm': 0.09038098901510239, 'learning_rate': 5.187014820042344e-06, 'epoch': 0.9}


 90%|█████████ | 1280/1417 [2:50:38<14:29,  6.35s/it]

{'loss': 0.0004, 'grad_norm': 0.025240005925297737, 'learning_rate': 4.8341566690190545e-06, 'epoch': 0.9}


 91%|█████████ | 1290/1417 [2:51:42<13:25,  6.34s/it]

{'loss': 0.1121, 'grad_norm': 0.1508284956216812, 'learning_rate': 4.481298517995766e-06, 'epoch': 0.91}


 92%|█████████▏| 1300/1417 [2:52:45<12:35,  6.45s/it]

{'loss': 0.0002, 'grad_norm': 0.45173144340515137, 'learning_rate': 4.128440366972477e-06, 'epoch': 0.92}


 92%|█████████▏| 1310/1417 [2:53:50<11:29,  6.44s/it]

{'loss': 0.2695, 'grad_norm': 4.827598571777344, 'learning_rate': 3.775582215949189e-06, 'epoch': 0.92}


 93%|█████████▎| 1320/1417 [2:54:52<10:04,  6.23s/it]

{'loss': 0.1109, 'grad_norm': 0.0023101852275431156, 'learning_rate': 3.4227240649259e-06, 'epoch': 0.93}


 94%|█████████▍| 1330/1417 [2:55:56<09:11,  6.34s/it]

{'loss': 0.1117, 'grad_norm': 11.685067176818848, 'learning_rate': 3.0698659139026114e-06, 'epoch': 0.94}


 95%|█████████▍| 1340/1417 [2:57:00<08:15,  6.44s/it]

{'loss': 0.0151, 'grad_norm': 7.749962329864502, 'learning_rate': 2.7170077628793227e-06, 'epoch': 0.95}


 95%|█████████▌| 1350/1417 [2:58:03<07:03,  6.33s/it]

{'loss': 0.0152, 'grad_norm': 0.0023299597669392824, 'learning_rate': 2.364149611856034e-06, 'epoch': 0.95}


 96%|█████████▌| 1360/1417 [2:59:06<05:58,  6.29s/it]

{'loss': 0.1414, 'grad_norm': 4.555029392242432, 'learning_rate': 2.0112914608327453e-06, 'epoch': 0.96}


 97%|█████████▋| 1370/1417 [3:00:10<04:58,  6.36s/it]

{'loss': 0.0073, 'grad_norm': 0.9464525580406189, 'learning_rate': 1.6584333098094568e-06, 'epoch': 0.97}


 97%|█████████▋| 1380/1417 [3:01:13<03:54,  6.34s/it]

{'loss': 0.1921, 'grad_norm': 0.007637963630259037, 'learning_rate': 1.305575158786168e-06, 'epoch': 0.97}


 98%|█████████▊| 1390/1417 [3:02:17<02:52,  6.37s/it]

{'loss': 0.0028, 'grad_norm': 0.005370251834392548, 'learning_rate': 9.527170077628794e-07, 'epoch': 0.98}


 99%|█████████▉| 1400/1417 [3:03:20<01:46,  6.28s/it]

{'loss': 0.0015, 'grad_norm': 12.213142395019531, 'learning_rate': 5.998588567395907e-07, 'epoch': 0.99}


100%|█████████▉| 1410/1417 [3:04:23<00:44,  6.37s/it]

{'loss': 0.0008, 'grad_norm': 3.250744104385376, 'learning_rate': 2.470007057163021e-07, 'epoch': 1.0}


100%|██████████| 1417/1417 [3:05:03<00:00,  4.82s/it]
100%|██████████| 1417/1417 [3:26:46<00:00,  4.82s/it]

{'eval_loss': 0.8440388441085815, 'eval_f1': 0.8728813559322034, 'eval_runtime': 1303.3926, 'eval_samples_per_second': 2.174, 'eval_steps_per_second': 0.272, 'epoch': 1.0}


100%|██████████| 1417/1417 [3:26:48<00:00,  8.76s/it]

{'train_runtime': 12408.1306, 'train_samples_per_second': 0.913, 'train_steps_per_second': 0.114, 'train_loss': 0.042522864496508245, 'epoch': 1.0}





TrainOutput(global_step=1417, training_loss=0.042522864496508245, metrics={'train_runtime': 12408.1306, 'train_samples_per_second': 0.913, 'train_steps_per_second': 0.114, 'total_flos': 83542390831104.0, 'train_loss': 0.042522864496508245, 'epoch': 1.0})

In [21]:
eval_result = trainer.evaluate()
f1_rubert_tiny2 = eval_result['eval_f1']
print(f'F1-score для rubert-tiny2: {f1_rubert_tiny2:.4f}')


100%|██████████| 355/355 [21:44<00:00,  3.68s/it]

F1-score для rubert-tiny2: 0.8729





In [22]:
model.save_pretrained('./models/my_model_v3')