## Install necessary packages

### For easy implementation, run this notebook on Google Colab

In [None]:
%%capture

!pip install flair

## Mount Google Drive and load data

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Prepare data
data_path = "/content/drive/MyDrive/"
exp_id = "presto-german-160623-10"
save_dir = "/content/drive/MyDrive/presto-exp-checkpoints/"+exp_id
model_name = 'xlm-roberta-base'
model_name = 'google/muril-base-cased'

lines = open(data_path+"data_de.dis",'r').readlines()
labels = open(data_path+"data_de.labels",'r').readlines()

In [None]:
print(lines[:5])
print(labels[:5])

['zeige mir angebote zeigen\n', 'pausiere mein mein lauftraining\n', 'meine meine mitfahrgelegenheit absagen\n', 'pause pausiere das laufen\n', 'beende beende meinen workout\n']
['1 0 0 0\n', '0 1 0 0\n', '1 0 0 0\n', '1 0 0 0\n', '1 0 0 0\n']


In [None]:
lines[0][:-1]

'zeige mir angebote zeigen'

## Prepare files for Train-Valid-Test Split

In [None]:
import random
temp = list(zip(lines, labels))
random.shuffle(temp)
res1, res2 = zip(*temp)
# res1 and res2 come out as tuples, and so must be converted to lists.
lines, labels = list(res1), list(res2)

train_lines = lines[:int(0.8*len(lines))]
train_labels = labels[:int(0.8*len(labels))]
val_lines = lines[int(0.8*len(lines)):int(0.9*len(lines))]
val_labels = labels[int(0.8*len(labels)):int(0.9*len(labels))]
test_lines = lines[int(0.9*len(lines)):]
test_labels = labels[int(0.9*len(labels)):]

## Data Processing for Token Classification

In [None]:
import pandas as pd
sentence_col = []
word_col = []
tag_col = []
for i in range(len(train_lines)):
  sent = train_lines[i][:-1]
  label = train_labels[i][:-1]
  words = sent.split(" ")
  tags = label.split(" ")
  sent_temp = ["Sentence: "+str(i+1)]
  for j in range(len(words)-1):
    sent_temp.append(None)
  sentence_col.extend(sent_temp)
  word_col.extend(words)
  tag_col.extend(tags)

df_train = pd.DataFrame()
df_train['Sentence #'] = sentence_col
df_train['Word'] = word_col
df_train['Tag'] = tag_col

sentence_col = []
word_col = []
tag_col = []
for i in range(len(val_lines)):
  sent = val_lines[i][:-1]
  label = val_labels[i][:-1]
  words = sent.split(" ")
  tags = label.split(" ")
  sent_temp = ["Sentence: "+str(i+1)]
  for j in range(len(words)-1):
    sent_temp.append(None)
  sentence_col.extend(sent_temp)
  word_col.extend(words)
  tag_col.extend(tags)

df_val = pd.DataFrame()
df_val['Sentence #'] = sentence_col
df_val['Word'] = word_col
df_val['Tag'] = tag_col

sentence_col = []
word_col = []
tag_col = []
for i in range(len(test_lines)):
  sent = test_lines[i][:-1]
  label = test_labels[i][:-1]
  words = sent.split(" ")
  tags = label.split(" ")
  sent_temp = ["Sentence: "+str(i+1)]
  for j in range(len(words)-1):
    sent_temp.append(None)
  sentence_col.extend(sent_temp)
  word_col.extend(words)
  tag_col.extend(tags)

df_test = pd.DataFrame()
df_test['Sentence #'] = sentence_col
df_test['Word'] = word_col
df_test['Tag'] = tag_col

In [None]:
df_val[:10]

Unnamed: 0,Sentence #,Word,Tag
0,Sentence: 1,mhmm,1
1,,das,0
2,,taxi,0
3,,kann,0
4,,weg,0
5,Sentence: 2,zahle,0
6,,die,0
7,,rechnung,0
8,,mit,1
9,,äh,1


In [None]:
import os
os.makedirs("data_dir",exist_ok = True)

In [None]:
with open("data_dir/train.txt",'w') as write_file:
  for i in range(len(df_train)):
    if(df_train['Sentence #'][i]!=None and i!=0):
      write_file.write("\n")
    write_file.write(df_train['Word'][i]+" "+df_train['Tag'][i]+"\n")


with open("data_dir/val.txt",'w') as write_file:
  for i in range(len(df_val)):
    if(df_val['Sentence #'][i]!=None and i!=0):
      write_file.write("\n")
    write_file.write(df_val['Word'][i]+" "+df_val['Tag'][i]+"\n")


with open("data_dir/test.txt",'w') as write_file:
  for i in range(len(df_test)):
    if(df_test['Sentence #'][i]!=None and i!=0):
      write_file.write("\n")
    write_file.write(df_test['Word'][i]+" "+df_test['Tag'][i]+"\n")


In [None]:
from flair.data import Corpus
from flair.datasets import ColumnCorpus

# define columns
columns = {0 : 'text', 1 : 'dc'}
# directory where the data resides
data_folder = 'data_dir/'
# initializing the corpus
corpus: Corpus = ColumnCorpus(data_folder, columns,
                              train_file = 'train.txt',
                              test_file = 'test.txt',
                              dev_file = 'val.txt')

2023-06-16 20:30:49,132 Reading data from data_dir
2023-06-16 20:30:49,133 Train: data_dir/train.txt
2023-06-16 20:30:49,134 Dev: data_dir/val.txt
2023-06-16 20:30:49,136 Test: data_dir/test.txt


In [None]:
print(len(corpus.train))

2472


In [None]:
# tag to predict
label_type = 'dc'
# make tag dictionary from the corpus
#tag_dictionary = corpus.make_tag_dictionary(tag_type=tag_type)
#print(tag_dictionary)

label_dict = corpus.make_label_dictionary(label_type=label_type, add_unk=False)
print(label_dict)

2023-06-16 20:30:49,667 Computing label dictionary. Progress:


2472it [00:00, 36168.14it/s]

2023-06-16 20:30:49,778 Dictionary created for label 'dc' with 2 values: 0 (seen 13943 times), 1 (seen 3861 times)
Dictionary with 2 tags: 0, 1





## Define Model

In [None]:
from flair.embeddings import TransformerWordEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer

embeddings = TransformerWordEmbeddings(model=model_name,
                                       layers="-1",
                                       subtoken_pooling="first",
                                       fine_tune=False,
                                       use_context=True,
                                       )

Downloading (…)okenizer_config.json:   0%|          | 0.00/206 [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Downloading (…)solve/main/vocab.txt:   0%|          | 0.00/3.16M [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/113 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/953M [00:00<?, ?B/s]

In [None]:
from flair.models import SequenceTagger
tagger : SequenceTagger = SequenceTagger(hidden_size=256,
                                       embeddings=embeddings,
                                       tag_dictionary=label_dict,
                                       tag_type=label_type,
                                       use_crf=False,
                                       use_rnn=True)
print(tagger)

def count_parameters(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)
print(count_parameters(tagger))

#238150676-238150660 = CRF
#240250884 - 238150660 = RNN

2023-06-16 20:32:44,619 SequenceTagger predicts: Dictionary with 4 tags: 0, 1, <START>, <STOP>
SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(197286, 768)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout(p=0.1, inplace=False)
              )
              (output):

## Start Training

In [None]:
from flair.trainers import ModelTrainer
trainer : ModelTrainer = ModelTrainer(tagger, corpus)
trainer.train(save_dir,
              learning_rate=0.1,
              mini_batch_size=16,
              max_epochs=150)

2023-06-16 13:24:07,827 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:07,833 Model: "SequenceTagger(
  (embeddings): TransformerWordEmbeddings(
    (model): BertModel(
      (embeddings): BertEmbeddings(
        (word_embeddings): Embedding(197286, 768)
        (position_embeddings): Embedding(512, 768)
        (token_type_embeddings): Embedding(2, 768)
        (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
        (dropout): Dropout(p=0.1, inplace=False)
      )
      (encoder): BertEncoder(
        (layer): ModuleList(
          (0-11): 12 x BertLayer(
            (attention): BertAttention(
              (self): BertSelfAttention(
                (query): Linear(in_features=768, out_features=768, bias=True)
                (key): Linear(in_features=768, out_features=768, bias=True)
                (value): Linear(in_features=768, out_features=768, bias=True)
                (dropout): Dropout

100%|██████████| 20/20 [00:02<00:00,  8.42it/s]

2023-06-16 13:24:28,932 Evaluating as a multi-label problem: False





2023-06-16 13:24:28,961 DEV : loss 0.513359546661377 - f1-score (micro avg)  0.7865
2023-06-16 13:24:28,975 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:28,980 saving best model
2023-06-16 13:24:32,524 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:32,707 epoch 2 - iter 15/155 - loss 0.49744385 - time (sec): 0.18 - samples/sec: 9708.14 - lr: 0.100000
2023-06-16 13:24:32,897 epoch 2 - iter 30/155 - loss 0.50454125 - time (sec): 0.37 - samples/sec: 9332.42 - lr: 0.100000
2023-06-16 13:24:33,083 epoch 2 - iter 45/155 - loss 0.50654265 - time (sec): 0.56 - samples/sec: 9388.00 - lr: 0.100000
2023-06-16 13:24:33,270 epoch 2 - iter 60/155 - loss 0.51345044 - time (sec): 0.74 - samples/sec: 9440.95 - lr: 0.100000
2023-06-16 13:24:33,443 epoch 2 - iter 75/155 - loss 0.51694891 - time (sec): 0.92 - samples/sec: 9508.34 - lr: 0.100000
2023-06-16 13:24:33,617 epoch 2 - iter 90/155 - loss 0.51892696 - time (sec): 1.09 - samp

100%|██████████| 20/20 [00:00<00:00, 132.30it/s]

2023-06-16 13:24:34,506 Evaluating as a multi-label problem: False
2023-06-16 13:24:34,537 DEV : loss 0.5047157406806946 - f1-score (micro avg)  0.7865





2023-06-16 13:24:34,552 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:34,560 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:34,734 epoch 3 - iter 15/155 - loss 0.51730011 - time (sec): 0.17 - samples/sec: 9870.88 - lr: 0.100000
2023-06-16 13:24:34,895 epoch 3 - iter 30/155 - loss 0.52362231 - time (sec): 0.33 - samples/sec: 10422.43 - lr: 0.100000
2023-06-16 13:24:35,051 epoch 3 - iter 45/155 - loss 0.52556077 - time (sec): 0.49 - samples/sec: 10677.96 - lr: 0.100000
2023-06-16 13:24:35,219 epoch 3 - iter 60/155 - loss 0.52274327 - time (sec): 0.66 - samples/sec: 10664.87 - lr: 0.100000
2023-06-16 13:24:35,378 epoch 3 - iter 75/155 - loss 0.51810688 - time (sec): 0.82 - samples/sec: 10686.36 - lr: 0.100000
2023-06-16 13:24:35,531 epoch 3 - iter 90/155 - loss 0.51494782 - time (sec): 0.97 - samples/sec: 10765.46 - lr: 0.100000
2023-06-16 13:24:35,690 epoch 3 - iter 105/155 - loss 0.51660641 - time (sec): 1.13 - sam

100%|██████████| 20/20 [00:00<00:00, 118.91it/s]

2023-06-16 13:24:36,416 Evaluating as a multi-label problem: False





2023-06-16 13:24:36,442 DEV : loss 0.510646402835846 - f1-score (micro avg)  0.7865
2023-06-16 13:24:36,453 BAD EPOCHS (no improvement): 1
2023-06-16 13:24:36,457 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:36,626 epoch 4 - iter 15/155 - loss 0.51925576 - time (sec): 0.17 - samples/sec: 10422.32 - lr: 0.100000
2023-06-16 13:24:36,793 epoch 4 - iter 30/155 - loss 0.52579217 - time (sec): 0.33 - samples/sec: 10379.99 - lr: 0.100000
2023-06-16 13:24:36,964 epoch 4 - iter 45/155 - loss 0.52382372 - time (sec): 0.51 - samples/sec: 10378.68 - lr: 0.100000
2023-06-16 13:24:37,136 epoch 4 - iter 60/155 - loss 0.52140164 - time (sec): 0.68 - samples/sec: 10376.00 - lr: 0.100000
2023-06-16 13:24:37,296 epoch 4 - iter 75/155 - loss 0.51368034 - time (sec): 0.84 - samples/sec: 10565.29 - lr: 0.100000
2023-06-16 13:24:37,454 epoch 4 - iter 90/155 - loss 0.51544110 - time (sec): 1.00 - samples/sec: 10558.34 - lr: 0.100000
2023

100%|██████████| 20/20 [00:00<00:00, 36.03it/s]

2023-06-16 13:24:38,726 Evaluating as a multi-label problem: False
2023-06-16 13:24:38,753 DEV : loss 0.49383673071861267 - f1-score (micro avg)  0.7865
2023-06-16 13:24:38,766 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:38,770 ----------------------------------------------------------------------------------------------------





2023-06-16 13:24:38,932 epoch 5 - iter 15/155 - loss 0.49239876 - time (sec): 0.16 - samples/sec: 11284.10 - lr: 0.100000
2023-06-16 13:24:39,094 epoch 5 - iter 30/155 - loss 0.48992833 - time (sec): 0.32 - samples/sec: 10886.29 - lr: 0.100000
2023-06-16 13:24:39,261 epoch 5 - iter 45/155 - loss 0.50019822 - time (sec): 0.49 - samples/sec: 10760.07 - lr: 0.100000
2023-06-16 13:24:39,485 epoch 5 - iter 60/155 - loss 0.50273002 - time (sec): 0.71 - samples/sec: 9837.94 - lr: 0.100000
2023-06-16 13:24:39,689 epoch 5 - iter 75/155 - loss 0.50553417 - time (sec): 0.92 - samples/sec: 9552.26 - lr: 0.100000
2023-06-16 13:24:39,868 epoch 5 - iter 90/155 - loss 0.50851266 - time (sec): 1.10 - samples/sec: 9585.64 - lr: 0.100000
2023-06-16 13:24:40,036 epoch 5 - iter 105/155 - loss 0.50776487 - time (sec): 1.26 - samples/sec: 9664.94 - lr: 0.100000
2023-06-16 13:24:40,212 epoch 5 - iter 120/155 - loss 0.50806906 - time (sec): 1.44 - samples/sec: 9665.18 - lr: 0.100000
2023-06-16 13:24:40,393 epo

100%|██████████| 20/20 [00:00<00:00, 107.26it/s]

2023-06-16 13:24:40,833 Evaluating as a multi-label problem: False





2023-06-16 13:24:40,862 DEV : loss 0.5003040432929993 - f1-score (micro avg)  0.7865
2023-06-16 13:24:40,875 BAD EPOCHS (no improvement): 1
2023-06-16 13:24:40,880 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:41,072 epoch 6 - iter 15/155 - loss 0.50761403 - time (sec): 0.19 - samples/sec: 9196.39 - lr: 0.100000
2023-06-16 13:24:41,261 epoch 6 - iter 30/155 - loss 0.49733251 - time (sec): 0.38 - samples/sec: 9169.35 - lr: 0.100000
2023-06-16 13:24:41,433 epoch 6 - iter 45/155 - loss 0.50069290 - time (sec): 0.55 - samples/sec: 9562.75 - lr: 0.100000
2023-06-16 13:24:41,631 epoch 6 - iter 60/155 - loss 0.50152963 - time (sec): 0.75 - samples/sec: 9390.53 - lr: 0.100000
2023-06-16 13:24:41,812 epoch 6 - iter 75/155 - loss 0.50442001 - time (sec): 0.93 - samples/sec: 9407.28 - lr: 0.100000
2023-06-16 13:24:41,986 epoch 6 - iter 90/155 - loss 0.50448300 - time (sec): 1.10 - samples/sec: 9499.04 - lr: 0.100000
2023-06-1

100%|██████████| 20/20 [00:00<00:00, 112.56it/s]


2023-06-16 13:24:42,979 Evaluating as a multi-label problem: False
2023-06-16 13:24:43,007 DEV : loss 0.49548476934432983 - f1-score (micro avg)  0.7865
2023-06-16 13:24:43,018 BAD EPOCHS (no improvement): 2
2023-06-16 13:24:43,023 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:43,185 epoch 7 - iter 15/155 - loss 0.50355094 - time (sec): 0.16 - samples/sec: 10567.79 - lr: 0.100000
2023-06-16 13:24:43,363 epoch 7 - iter 30/155 - loss 0.49064072 - time (sec): 0.34 - samples/sec: 10233.76 - lr: 0.100000
2023-06-16 13:24:43,552 epoch 7 - iter 45/155 - loss 0.49530962 - time (sec): 0.53 - samples/sec: 9907.33 - lr: 0.100000
2023-06-16 13:24:43,764 epoch 7 - iter 60/155 - loss 0.49956612 - time (sec): 0.74 - samples/sec: 9366.50 - lr: 0.100000
2023-06-16 13:24:43,949 epoch 7 - iter 75/155 - loss 0.50024889 - time (sec): 0.92 - samples/sec: 9347.40 - lr: 0.100000
2023-06-16 13:24:44,158 epoch 7 - iter 90/155 - loss 0.50648

100%|██████████| 20/20 [00:00<00:00, 74.08it/s]

2023-06-16 13:24:45,362 Evaluating as a multi-label problem: False
2023-06-16 13:24:45,388 DEV : loss 0.4903302788734436 - f1-score (micro avg)  0.7865





2023-06-16 13:24:45,402 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:45,407 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:45,637 epoch 8 - iter 15/155 - loss 0.50377273 - time (sec): 0.23 - samples/sec: 7692.11 - lr: 0.100000
2023-06-16 13:24:45,841 epoch 8 - iter 30/155 - loss 0.50421239 - time (sec): 0.43 - samples/sec: 8176.63 - lr: 0.100000
2023-06-16 13:24:46,055 epoch 8 - iter 45/155 - loss 0.50069103 - time (sec): 0.65 - samples/sec: 8218.07 - lr: 0.100000
2023-06-16 13:24:46,262 epoch 8 - iter 60/155 - loss 0.50204351 - time (sec): 0.85 - samples/sec: 8279.86 - lr: 0.100000
2023-06-16 13:24:46,493 epoch 8 - iter 75/155 - loss 0.50522178 - time (sec): 1.08 - samples/sec: 8143.54 - lr: 0.100000
2023-06-16 13:24:46,688 epoch 8 - iter 90/155 - loss 0.50551322 - time (sec): 1.28 - samples/sec: 8197.09 - lr: 0.100000
2023-06-16 13:24:46,892 epoch 8 - iter 105/155 - loss 0.50966345 - time (sec): 1.48 - samples/

100%|██████████| 20/20 [00:00<00:00, 83.01it/s]

2023-06-16 13:24:47,837 Evaluating as a multi-label problem: False
2023-06-16 13:24:47,870 DEV : loss 0.4739281237125397 - f1-score (micro avg)  0.7865
2023-06-16 13:24:47,882 BAD EPOCHS (no improvement): 0





2023-06-16 13:24:47,890 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:48,095 epoch 9 - iter 15/155 - loss 0.51106749 - time (sec): 0.20 - samples/sec: 8846.69 - lr: 0.100000
2023-06-16 13:24:48,309 epoch 9 - iter 30/155 - loss 0.50853685 - time (sec): 0.42 - samples/sec: 8529.97 - lr: 0.100000
2023-06-16 13:24:48,498 epoch 9 - iter 45/155 - loss 0.50785846 - time (sec): 0.60 - samples/sec: 8837.25 - lr: 0.100000
2023-06-16 13:24:48,701 epoch 9 - iter 60/155 - loss 0.51367083 - time (sec): 0.81 - samples/sec: 8732.89 - lr: 0.100000
2023-06-16 13:24:48,881 epoch 9 - iter 75/155 - loss 0.50905732 - time (sec): 0.99 - samples/sec: 8932.93 - lr: 0.100000
2023-06-16 13:24:49,064 epoch 9 - iter 90/155 - loss 0.50747101 - time (sec): 1.17 - samples/sec: 9038.54 - lr: 0.100000
2023-06-16 13:24:49,241 epoch 9 - iter 105/155 - loss 0.50560003 - time (sec): 1.35 - samples/sec: 9128.36 - lr: 0.100000
2023-06-16 13:24:49,405 epo

100%|██████████| 20/20 [00:00<00:00, 107.20it/s]

2023-06-16 13:24:50,023 Evaluating as a multi-label problem: False





2023-06-16 13:24:50,049 DEV : loss 0.49135535955429077 - f1-score (micro avg)  0.7865
2023-06-16 13:24:50,061 BAD EPOCHS (no improvement): 1
2023-06-16 13:24:50,065 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:50,244 epoch 10 - iter 15/155 - loss 0.48883617 - time (sec): 0.18 - samples/sec: 9013.88 - lr: 0.100000
2023-06-16 13:24:50,413 epoch 10 - iter 30/155 - loss 0.50072200 - time (sec): 0.35 - samples/sec: 9797.55 - lr: 0.100000
2023-06-16 13:24:50,580 epoch 10 - iter 45/155 - loss 0.50428575 - time (sec): 0.51 - samples/sec: 9926.71 - lr: 0.100000
2023-06-16 13:24:50,757 epoch 10 - iter 60/155 - loss 0.51109671 - time (sec): 0.69 - samples/sec: 9875.90 - lr: 0.100000
2023-06-16 13:24:50,918 epoch 10 - iter 75/155 - loss 0.50942307 - time (sec): 0.85 - samples/sec: 10103.43 - lr: 0.100000
2023-06-16 13:24:51,098 epoch 10 - iter 90/155 - loss 0.50588723 - time (sec): 1.03 - samples/sec: 10073.12 - lr: 0.100000


100%|██████████| 20/20 [00:00<00:00, 120.85it/s]

2023-06-16 13:24:52,037 Evaluating as a multi-label problem: False





2023-06-16 13:24:52,066 DEV : loss 0.4716068208217621 - f1-score (micro avg)  0.7865
2023-06-16 13:24:52,079 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:52,084 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:52,265 epoch 11 - iter 15/155 - loss 0.52641456 - time (sec): 0.18 - samples/sec: 9488.81 - lr: 0.100000
2023-06-16 13:24:52,434 epoch 11 - iter 30/155 - loss 0.50564945 - time (sec): 0.35 - samples/sec: 10030.14 - lr: 0.100000
2023-06-16 13:24:52,606 epoch 11 - iter 45/155 - loss 0.50178043 - time (sec): 0.52 - samples/sec: 10096.85 - lr: 0.100000
2023-06-16 13:24:52,778 epoch 11 - iter 60/155 - loss 0.49972215 - time (sec): 0.69 - samples/sec: 10032.04 - lr: 0.100000
2023-06-16 13:24:52,947 epoch 11 - iter 75/155 - loss 0.50054987 - time (sec): 0.86 - samples/sec: 10017.60 - lr: 0.100000
2023-06-16 13:24:53,127 epoch 11 - iter 90/155 - loss 0.50155182 - time (sec): 1.04 - samples/sec: 9955.00 - lr: 0.100000

100%|██████████| 20/20 [00:00<00:00, 112.00it/s]

2023-06-16 13:24:54,112 Evaluating as a multi-label problem: False





2023-06-16 13:24:54,143 DEV : loss 0.49658164381980896 - f1-score (micro avg)  0.7865
2023-06-16 13:24:54,154 BAD EPOCHS (no improvement): 1
2023-06-16 13:24:54,158 ----------------------------------------------------------------------------------------------------
2023-06-16 13:24:54,336 epoch 12 - iter 15/155 - loss 0.49023994 - time (sec): 0.18 - samples/sec: 9826.45 - lr: 0.100000
2023-06-16 13:24:54,515 epoch 12 - iter 30/155 - loss 0.49244009 - time (sec): 0.35 - samples/sec: 9821.38 - lr: 0.100000
2023-06-16 13:24:54,698 epoch 12 - iter 45/155 - loss 0.49030275 - time (sec): 0.54 - samples/sec: 9637.21 - lr: 0.100000
2023-06-16 13:24:54,875 epoch 12 - iter 60/155 - loss 0.49328226 - time (sec): 0.71 - samples/sec: 9714.73 - lr: 0.100000
2023-06-16 13:24:55,040 epoch 12 - iter 75/155 - loss 0.49644765 - time (sec): 0.88 - samples/sec: 9850.29 - lr: 0.100000
2023-06-16 13:24:55,230 epoch 12 - iter 90/155 - loss 0.49933559 - time (sec): 1.07 - samples/sec: 9763.61 - lr: 0.100000
20

100%|██████████| 20/20 [00:00<00:00, 115.24it/s]

2023-06-16 13:24:56,197 Evaluating as a multi-label problem: False





2023-06-16 13:24:56,227 DEV : loss 0.47278791666030884 - f1-score (micro avg)  0.7919
2023-06-16 13:24:56,242 BAD EPOCHS (no improvement): 0
2023-06-16 13:24:56,247 saving best model
2023-06-16 13:25:01,336 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:01,518 epoch 13 - iter 15/155 - loss 0.50308164 - time (sec): 0.18 - samples/sec: 9598.58 - lr: 0.100000
2023-06-16 13:25:01,709 epoch 13 - iter 30/155 - loss 0.49512118 - time (sec): 0.37 - samples/sec: 9468.34 - lr: 0.100000
2023-06-16 13:25:01,895 epoch 13 - iter 45/155 - loss 0.49403323 - time (sec): 0.56 - samples/sec: 9487.15 - lr: 0.100000
2023-06-16 13:25:02,074 epoch 13 - iter 60/155 - loss 0.48798580 - time (sec): 0.74 - samples/sec: 9531.24 - lr: 0.100000
2023-06-16 13:25:02,250 epoch 13 - iter 75/155 - loss 0.49139645 - time (sec): 0.91 - samples/sec: 9554.15 - lr: 0.100000
2023-06-16 13:25:02,445 epoch 13 - iter 90/155 - loss 0.49207309 - time (sec): 1.1

100%|██████████| 20/20 [00:00<00:00, 121.74it/s]

2023-06-16 13:25:03,427 Evaluating as a multi-label problem: False
2023-06-16 13:25:03,451 DEV : loss 0.47078296542167664 - f1-score (micro avg)  0.787





2023-06-16 13:25:03,464 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:03,468 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:03,636 epoch 14 - iter 15/155 - loss 0.48462971 - time (sec): 0.17 - samples/sec: 10484.49 - lr: 0.100000
2023-06-16 13:25:03,800 epoch 14 - iter 30/155 - loss 0.48785928 - time (sec): 0.33 - samples/sec: 10290.95 - lr: 0.100000
2023-06-16 13:25:03,955 epoch 14 - iter 45/155 - loss 0.48813463 - time (sec): 0.48 - samples/sec: 10702.96 - lr: 0.100000
2023-06-16 13:25:04,117 epoch 14 - iter 60/155 - loss 0.48560962 - time (sec): 0.65 - samples/sec: 10701.65 - lr: 0.100000
2023-06-16 13:25:04,276 epoch 14 - iter 75/155 - loss 0.48567965 - time (sec): 0.81 - samples/sec: 10648.21 - lr: 0.100000
2023-06-16 13:25:04,444 epoch 14 - iter 90/155 - loss 0.48487186 - time (sec): 0.97 - samples/sec: 10648.95 - lr: 0.100000
2023-06-16 13:25:04,602 epoch 14 - iter 105/155 - loss 0.48606164 - time (sec): 1.

100%|██████████| 20/20 [00:00<00:00, 124.81it/s]

2023-06-16 13:25:05,319 Evaluating as a multi-label problem: False
2023-06-16 13:25:05,345 DEV : loss 0.4879070222377777 - f1-score (micro avg)  0.8168





2023-06-16 13:25:05,357 BAD EPOCHS (no improvement): 0
2023-06-16 13:25:05,361 saving best model
2023-06-16 13:25:09,280 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:09,448 epoch 15 - iter 15/155 - loss 0.48906838 - time (sec): 0.16 - samples/sec: 10825.73 - lr: 0.100000
2023-06-16 13:25:09,613 epoch 15 - iter 30/155 - loss 0.48471497 - time (sec): 0.33 - samples/sec: 10814.23 - lr: 0.100000
2023-06-16 13:25:09,774 epoch 15 - iter 45/155 - loss 0.48563484 - time (sec): 0.49 - samples/sec: 10651.29 - lr: 0.100000
2023-06-16 13:25:09,943 epoch 15 - iter 60/155 - loss 0.48639349 - time (sec): 0.66 - samples/sec: 10595.72 - lr: 0.100000
2023-06-16 13:25:10,132 epoch 15 - iter 75/155 - loss 0.48797503 - time (sec): 0.85 - samples/sec: 10265.89 - lr: 0.100000
2023-06-16 13:25:10,440 epoch 15 - iter 90/155 - loss 0.49102832 - time (sec): 1.16 - samples/sec: 9032.35 - lr: 0.100000
2023-06-16 13:25:10,644 epoch 15 - iter 1

100%|██████████| 20/20 [00:00<00:00, 117.69it/s]

2023-06-16 13:25:11,599 Evaluating as a multi-label problem: False





2023-06-16 13:25:11,626 DEV : loss 0.44620633125305176 - f1-score (micro avg)  0.8091
2023-06-16 13:25:11,638 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:11,642 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:11,804 epoch 16 - iter 15/155 - loss 0.48610794 - time (sec): 0.16 - samples/sec: 10501.25 - lr: 0.100000
2023-06-16 13:25:12,003 epoch 16 - iter 30/155 - loss 0.48493748 - time (sec): 0.36 - samples/sec: 9608.17 - lr: 0.100000
2023-06-16 13:25:12,265 epoch 16 - iter 45/155 - loss 0.48390473 - time (sec): 0.62 - samples/sec: 8364.35 - lr: 0.100000
2023-06-16 13:25:12,447 epoch 16 - iter 60/155 - loss 0.48425356 - time (sec): 0.80 - samples/sec: 8557.12 - lr: 0.100000
2023-06-16 13:25:12,611 epoch 16 - iter 75/155 - loss 0.48620467 - time (sec): 0.97 - samples/sec: 8941.33 - lr: 0.100000
2023-06-16 13:25:12,783 epoch 16 - iter 90/155 - loss 0.48448797 - time (sec): 1.14 - samples/sec: 9172.25 - lr: 0.100000
2

100%|██████████| 20/20 [00:00<00:00, 83.40it/s]

2023-06-16 13:25:13,775 Evaluating as a multi-label problem: False
2023-06-16 13:25:13,810 DEV : loss 0.45766204595565796 - f1-score (micro avg)  0.8087
2023-06-16 13:25:13,823 BAD EPOCHS (no improvement): 2





2023-06-16 13:25:14,074 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:14,251 epoch 17 - iter 15/155 - loss 0.48298177 - time (sec): 0.17 - samples/sec: 9944.89 - lr: 0.100000
2023-06-16 13:25:14,430 epoch 17 - iter 30/155 - loss 0.49389443 - time (sec): 0.35 - samples/sec: 9772.21 - lr: 0.100000
2023-06-16 13:25:14,598 epoch 17 - iter 45/155 - loss 0.48689371 - time (sec): 0.52 - samples/sec: 9936.17 - lr: 0.100000
2023-06-16 13:25:14,780 epoch 17 - iter 60/155 - loss 0.48660849 - time (sec): 0.70 - samples/sec: 9742.71 - lr: 0.100000
2023-06-16 13:25:14,962 epoch 17 - iter 75/155 - loss 0.48238775 - time (sec): 0.89 - samples/sec: 9725.28 - lr: 0.100000
2023-06-16 13:25:15,142 epoch 17 - iter 90/155 - loss 0.48143143 - time (sec): 1.07 - samples/sec: 9689.69 - lr: 0.100000
2023-06-16 13:25:15,330 epoch 17 - iter 105/155 - loss 0.47756465 - time (sec): 1.25 - samples/sec: 9672.92 - lr: 0.100000
2023-06-16 13:25:15,

100%|██████████| 20/20 [00:00<00:00, 106.72it/s]


2023-06-16 13:25:16,134 Evaluating as a multi-label problem: False
2023-06-16 13:25:16,159 DEV : loss 0.44940853118896484 - f1-score (micro avg)  0.8014
2023-06-16 13:25:16,173 BAD EPOCHS (no improvement): 3
2023-06-16 13:25:16,178 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:16,348 epoch 18 - iter 15/155 - loss 0.47511530 - time (sec): 0.17 - samples/sec: 10819.69 - lr: 0.100000
2023-06-16 13:25:16,526 epoch 18 - iter 30/155 - loss 0.47812950 - time (sec): 0.35 - samples/sec: 10369.31 - lr: 0.100000
2023-06-16 13:25:16,709 epoch 18 - iter 45/155 - loss 0.47148340 - time (sec): 0.53 - samples/sec: 10135.98 - lr: 0.100000
2023-06-16 13:25:16,890 epoch 18 - iter 60/155 - loss 0.47144975 - time (sec): 0.71 - samples/sec: 9999.45 - lr: 0.100000
2023-06-16 13:25:17,071 epoch 18 - iter 75/155 - loss 0.47091996 - time (sec): 0.89 - samples/sec: 9977.98 - lr: 0.100000
2023-06-16 13:25:17,271 epoch 18 - iter 90/155 - loss 

100%|██████████| 20/20 [00:00<00:00, 101.90it/s]


2023-06-16 13:25:18,424 Evaluating as a multi-label problem: False
2023-06-16 13:25:18,448 DEV : loss 0.44978994131088257 - f1-score (micro avg)  0.8087
2023-06-16 13:25:18,459 Epoch    18: reducing learning rate of group 0 to 5.0000e-02.
2023-06-16 13:25:18,461 BAD EPOCHS (no improvement): 4
2023-06-16 13:25:18,466 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:18,643 epoch 19 - iter 15/155 - loss 0.45916860 - time (sec): 0.18 - samples/sec: 9315.56 - lr: 0.050000
2023-06-16 13:25:18,841 epoch 19 - iter 30/155 - loss 0.46539803 - time (sec): 0.37 - samples/sec: 8996.67 - lr: 0.050000
2023-06-16 13:25:19,000 epoch 19 - iter 45/155 - loss 0.45824786 - time (sec): 0.53 - samples/sec: 9589.86 - lr: 0.050000
2023-06-16 13:25:19,194 epoch 19 - iter 60/155 - loss 0.46090010 - time (sec): 0.73 - samples/sec: 9490.13 - lr: 0.050000
2023-06-16 13:25:19,374 epoch 19 - iter 75/155 - loss 0.45764462 - time (sec): 0.91 - samples

100%|██████████| 20/20 [00:00<00:00, 107.29it/s]

2023-06-16 13:25:20,568 Evaluating as a multi-label problem: False





2023-06-16 13:25:20,593 DEV : loss 0.4410528242588043 - f1-score (micro avg)  0.8082
2023-06-16 13:25:20,606 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:20,612 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:20,818 epoch 20 - iter 15/155 - loss 0.43872595 - time (sec): 0.20 - samples/sec: 8641.38 - lr: 0.050000
2023-06-16 13:25:20,998 epoch 20 - iter 30/155 - loss 0.43950511 - time (sec): 0.38 - samples/sec: 8870.54 - lr: 0.050000
2023-06-16 13:25:21,175 epoch 20 - iter 45/155 - loss 0.45175957 - time (sec): 0.56 - samples/sec: 9123.91 - lr: 0.050000
2023-06-16 13:25:21,354 epoch 20 - iter 60/155 - loss 0.44508797 - time (sec): 0.74 - samples/sec: 9313.82 - lr: 0.050000
2023-06-16 13:25:21,558 epoch 20 - iter 75/155 - loss 0.44686490 - time (sec): 0.95 - samples/sec: 9094.92 - lr: 0.050000
2023-06-16 13:25:21,741 epoch 20 - iter 90/155 - loss 0.44653060 - time (sec): 1.13 - samples/sec: 9233.93 - lr: 0.050000
202

100%|██████████| 20/20 [00:00<00:00, 107.97it/s]

2023-06-16 13:25:22,716 Evaluating as a multi-label problem: False





2023-06-16 13:25:22,745 DEV : loss 0.4361001253128052 - f1-score (micro avg)  0.8087
2023-06-16 13:25:22,758 BAD EPOCHS (no improvement): 2
2023-06-16 13:25:22,763 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:22,971 epoch 21 - iter 15/155 - loss 0.45638763 - time (sec): 0.21 - samples/sec: 8386.36 - lr: 0.050000
2023-06-16 13:25:23,150 epoch 21 - iter 30/155 - loss 0.44800068 - time (sec): 0.38 - samples/sec: 8953.66 - lr: 0.050000
2023-06-16 13:25:23,350 epoch 21 - iter 45/155 - loss 0.45421196 - time (sec): 0.58 - samples/sec: 8976.54 - lr: 0.050000
2023-06-16 13:25:23,536 epoch 21 - iter 60/155 - loss 0.45181776 - time (sec): 0.77 - samples/sec: 9085.93 - lr: 0.050000
2023-06-16 13:25:23,707 epoch 21 - iter 75/155 - loss 0.45246469 - time (sec): 0.94 - samples/sec: 9281.09 - lr: 0.050000
2023-06-16 13:25:23,903 epoch 21 - iter 90/155 - loss 0.45259829 - time (sec): 1.14 - samples/sec: 9192.14 - lr: 0.050000
202

100%|██████████| 20/20 [00:00<00:00, 112.29it/s]

2023-06-16 13:25:24,883 Evaluating as a multi-label problem: False





2023-06-16 13:25:24,917 DEV : loss 0.44146642088890076 - f1-score (micro avg)  0.8195
2023-06-16 13:25:24,931 BAD EPOCHS (no improvement): 0
2023-06-16 13:25:24,936 saving best model
2023-06-16 13:25:28,919 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:29,118 epoch 22 - iter 15/155 - loss 0.45064410 - time (sec): 0.20 - samples/sec: 9057.70 - lr: 0.050000
2023-06-16 13:25:29,301 epoch 22 - iter 30/155 - loss 0.45490216 - time (sec): 0.38 - samples/sec: 9290.03 - lr: 0.050000
2023-06-16 13:25:29,479 epoch 22 - iter 45/155 - loss 0.45582674 - time (sec): 0.56 - samples/sec: 9384.78 - lr: 0.050000
2023-06-16 13:25:29,647 epoch 22 - iter 60/155 - loss 0.45008536 - time (sec): 0.72 - samples/sec: 9623.98 - lr: 0.050000
2023-06-16 13:25:29,827 epoch 22 - iter 75/155 - loss 0.44467467 - time (sec): 0.91 - samples/sec: 9669.98 - lr: 0.050000
2023-06-16 13:25:30,000 epoch 22 - iter 90/155 - loss 0.44510824 - time (sec): 1.0

100%|██████████| 20/20 [00:00<00:00, 32.64it/s] 

2023-06-16 13:25:31,394 Evaluating as a multi-label problem: False
2023-06-16 13:25:31,422 DEV : loss 0.4248441159725189 - f1-score (micro avg)  0.8132
2023-06-16 13:25:31,437 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:31,443 ----------------------------------------------------------------------------------------------------





2023-06-16 13:25:31,618 epoch 23 - iter 15/155 - loss 0.44175005 - time (sec): 0.17 - samples/sec: 10225.76 - lr: 0.050000
2023-06-16 13:25:31,800 epoch 23 - iter 30/155 - loss 0.43491044 - time (sec): 0.35 - samples/sec: 9730.44 - lr: 0.050000
2023-06-16 13:25:31,983 epoch 23 - iter 45/155 - loss 0.44561148 - time (sec): 0.54 - samples/sec: 9642.34 - lr: 0.050000
2023-06-16 13:25:32,164 epoch 23 - iter 60/155 - loss 0.44293879 - time (sec): 0.72 - samples/sec: 9689.36 - lr: 0.050000
2023-06-16 13:25:32,333 epoch 23 - iter 75/155 - loss 0.44366325 - time (sec): 0.89 - samples/sec: 9690.98 - lr: 0.050000
2023-06-16 13:25:32,498 epoch 23 - iter 90/155 - loss 0.44562779 - time (sec): 1.05 - samples/sec: 9837.82 - lr: 0.050000
2023-06-16 13:25:32,673 epoch 23 - iter 105/155 - loss 0.44452860 - time (sec): 1.23 - samples/sec: 9877.90 - lr: 0.050000
2023-06-16 13:25:32,832 epoch 23 - iter 120/155 - loss 0.44484960 - time (sec): 1.39 - samples/sec: 10016.17 - lr: 0.050000
2023-06-16 13:25:32,

100%|██████████| 20/20 [00:00<00:00, 123.99it/s]

2023-06-16 13:25:33,401 Evaluating as a multi-label problem: False
2023-06-16 13:25:33,424 DEV : loss 0.42817750573158264 - f1-score (micro avg)  0.8186





2023-06-16 13:25:33,439 BAD EPOCHS (no improvement): 2
2023-06-16 13:25:33,443 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:33,608 epoch 24 - iter 15/155 - loss 0.48322641 - time (sec): 0.16 - samples/sec: 10499.23 - lr: 0.050000
2023-06-16 13:25:33,764 epoch 24 - iter 30/155 - loss 0.46447297 - time (sec): 0.32 - samples/sec: 10623.65 - lr: 0.050000
2023-06-16 13:25:33,924 epoch 24 - iter 45/155 - loss 0.45408086 - time (sec): 0.48 - samples/sec: 10769.17 - lr: 0.050000
2023-06-16 13:25:34,101 epoch 24 - iter 60/155 - loss 0.45374426 - time (sec): 0.66 - samples/sec: 10468.34 - lr: 0.050000
2023-06-16 13:25:34,268 epoch 24 - iter 75/155 - loss 0.45194083 - time (sec): 0.82 - samples/sec: 10523.86 - lr: 0.050000
2023-06-16 13:25:34,435 epoch 24 - iter 90/155 - loss 0.45033464 - time (sec): 0.99 - samples/sec: 10480.40 - lr: 0.050000
2023-06-16 13:25:34,600 epoch 24 - iter 105/155 - loss 0.45122831 - time (sec): 1.

100%|██████████| 20/20 [00:00<00:00, 120.12it/s]

2023-06-16 13:25:35,335 Evaluating as a multi-label problem: False





2023-06-16 13:25:35,360 DEV : loss 0.4282483756542206 - f1-score (micro avg)  0.8128
2023-06-16 13:25:35,373 BAD EPOCHS (no improvement): 3
2023-06-16 13:25:35,377 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:35,534 epoch 25 - iter 15/155 - loss 0.44941858 - time (sec): 0.16 - samples/sec: 11227.93 - lr: 0.050000
2023-06-16 13:25:35,695 epoch 25 - iter 30/155 - loss 0.45533667 - time (sec): 0.32 - samples/sec: 11073.41 - lr: 0.050000
2023-06-16 13:25:35,858 epoch 25 - iter 45/155 - loss 0.45592549 - time (sec): 0.48 - samples/sec: 11004.02 - lr: 0.050000
2023-06-16 13:25:36,020 epoch 25 - iter 60/155 - loss 0.45177391 - time (sec): 0.64 - samples/sec: 10912.19 - lr: 0.050000
2023-06-16 13:25:36,183 epoch 25 - iter 75/155 - loss 0.45021410 - time (sec): 0.80 - samples/sec: 10893.27 - lr: 0.050000
2023-06-16 13:25:36,336 epoch 25 - iter 90/155 - loss 0.44904068 - time (sec): 0.96 - samples/sec: 10961.80 - lr: 0.0500

100%|██████████| 20/20 [00:00<00:00, 123.99it/s]

2023-06-16 13:25:37,199 Evaluating as a multi-label problem: False
2023-06-16 13:25:37,225 DEV : loss 0.4312303960323334 - f1-score (micro avg)  0.8105





2023-06-16 13:25:37,238 Epoch    25: reducing learning rate of group 0 to 2.5000e-02.
2023-06-16 13:25:37,240 BAD EPOCHS (no improvement): 4
2023-06-16 13:25:37,245 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:37,414 epoch 26 - iter 15/155 - loss 0.45122316 - time (sec): 0.17 - samples/sec: 10717.40 - lr: 0.025000
2023-06-16 13:25:37,570 epoch 26 - iter 30/155 - loss 0.44956237 - time (sec): 0.32 - samples/sec: 10944.48 - lr: 0.025000
2023-06-16 13:25:37,724 epoch 26 - iter 45/155 - loss 0.44584382 - time (sec): 0.48 - samples/sec: 11008.42 - lr: 0.025000
2023-06-16 13:25:37,887 epoch 26 - iter 60/155 - loss 0.45011655 - time (sec): 0.64 - samples/sec: 10945.57 - lr: 0.025000
2023-06-16 13:25:38,047 epoch 26 - iter 75/155 - loss 0.44903242 - time (sec): 0.80 - samples/sec: 10922.84 - lr: 0.025000
2023-06-16 13:25:38,204 epoch 26 - iter 90/155 - loss 0.44684485 - time (sec): 0.96 - samples/sec: 10892.12 - lr: 0.025

100%|██████████| 20/20 [00:00<00:00, 122.84it/s]

2023-06-16 13:25:39,060 Evaluating as a multi-label problem: False
2023-06-16 13:25:39,084 DEV : loss 0.42057371139526367 - f1-score (micro avg)  0.8155





2023-06-16 13:25:39,097 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:39,101 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:39,259 epoch 27 - iter 15/155 - loss 0.44041904 - time (sec): 0.16 - samples/sec: 10661.25 - lr: 0.025000
2023-06-16 13:25:39,469 epoch 27 - iter 30/155 - loss 0.44493780 - time (sec): 0.37 - samples/sec: 9513.79 - lr: 0.025000
2023-06-16 13:25:39,684 epoch 27 - iter 45/155 - loss 0.44480970 - time (sec): 0.58 - samples/sec: 9045.84 - lr: 0.025000
2023-06-16 13:25:39,873 epoch 27 - iter 60/155 - loss 0.43755699 - time (sec): 0.77 - samples/sec: 9049.52 - lr: 0.025000
2023-06-16 13:25:40,058 epoch 27 - iter 75/155 - loss 0.43886947 - time (sec): 0.95 - samples/sec: 9052.36 - lr: 0.025000
2023-06-16 13:25:40,229 epoch 27 - iter 90/155 - loss 0.43795199 - time (sec): 1.13 - samples/sec: 9224.64 - lr: 0.025000
2023-06-16 13:25:40,416 epoch 27 - iter 105/155 - loss 0.43816993 - time (sec): 1.31 - 

100%|██████████| 20/20 [00:00<00:00, 113.98it/s]

2023-06-16 13:25:41,215 Evaluating as a multi-label problem: False





2023-06-16 13:25:41,247 DEV : loss 0.42626386880874634 - f1-score (micro avg)  0.8155
2023-06-16 13:25:41,263 BAD EPOCHS (no improvement): 2
2023-06-16 13:25:41,280 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:41,462 epoch 28 - iter 15/155 - loss 0.44131598 - time (sec): 0.18 - samples/sec: 9900.04 - lr: 0.025000
2023-06-16 13:25:41,664 epoch 28 - iter 30/155 - loss 0.44927543 - time (sec): 0.38 - samples/sec: 9293.28 - lr: 0.025000
2023-06-16 13:25:41,822 epoch 28 - iter 45/155 - loss 0.45007433 - time (sec): 0.54 - samples/sec: 9754.89 - lr: 0.025000
2023-06-16 13:25:41,990 epoch 28 - iter 60/155 - loss 0.44707305 - time (sec): 0.71 - samples/sec: 9944.00 - lr: 0.025000
2023-06-16 13:25:42,175 epoch 28 - iter 75/155 - loss 0.44294861 - time (sec): 0.89 - samples/sec: 9829.91 - lr: 0.025000
2023-06-16 13:25:42,335 epoch 28 - iter 90/155 - loss 0.44133677 - time (sec): 1.05 - samples/sec: 9956.02 - lr: 0.025000
20

100%|██████████| 20/20 [00:00<00:00, 88.58it/s] 

2023-06-16 13:25:43,443 Evaluating as a multi-label problem: False
2023-06-16 13:25:43,477 DEV : loss 0.42646095156669617 - f1-score (micro avg)  0.8132
2023-06-16 13:25:43,493 BAD EPOCHS (no improvement): 3
2023-06-16 13:25:43,500 ----------------------------------------------------------------------------------------------------





2023-06-16 13:25:43,742 epoch 29 - iter 15/155 - loss 0.45397069 - time (sec): 0.24 - samples/sec: 7483.76 - lr: 0.025000
2023-06-16 13:25:43,930 epoch 29 - iter 30/155 - loss 0.45534755 - time (sec): 0.43 - samples/sec: 8301.45 - lr: 0.025000
2023-06-16 13:25:44,123 epoch 29 - iter 45/155 - loss 0.44355759 - time (sec): 0.62 - samples/sec: 8612.95 - lr: 0.025000
2023-06-16 13:25:44,314 epoch 29 - iter 60/155 - loss 0.44203667 - time (sec): 0.81 - samples/sec: 8685.83 - lr: 0.025000
2023-06-16 13:25:44,512 epoch 29 - iter 75/155 - loss 0.43663059 - time (sec): 1.01 - samples/sec: 8718.31 - lr: 0.025000
2023-06-16 13:25:44,727 epoch 29 - iter 90/155 - loss 0.43515234 - time (sec): 1.22 - samples/sec: 8608.89 - lr: 0.025000
2023-06-16 13:25:44,933 epoch 29 - iter 105/155 - loss 0.43672564 - time (sec): 1.43 - samples/sec: 8526.28 - lr: 0.025000
2023-06-16 13:25:45,152 epoch 29 - iter 120/155 - loss 0.43701374 - time (sec): 1.65 - samples/sec: 8439.69 - lr: 0.025000
2023-06-16 13:25:45,35

100%|██████████| 20/20 [00:00<00:00, 81.03it/s]

2023-06-16 13:25:45,892 Evaluating as a multi-label problem: False
2023-06-16 13:25:45,935 DEV : loss 0.42079195380210876 - f1-score (micro avg)  0.8141





2023-06-16 13:25:45,955 Epoch    29: reducing learning rate of group 0 to 1.2500e-02.
2023-06-16 13:25:45,958 BAD EPOCHS (no improvement): 4
2023-06-16 13:25:45,965 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:46,181 epoch 30 - iter 15/155 - loss 0.43085497 - time (sec): 0.21 - samples/sec: 7921.13 - lr: 0.012500
2023-06-16 13:25:46,377 epoch 30 - iter 30/155 - loss 0.43234986 - time (sec): 0.41 - samples/sec: 8433.76 - lr: 0.012500
2023-06-16 13:25:46,597 epoch 30 - iter 45/155 - loss 0.43412229 - time (sec): 0.63 - samples/sec: 8363.14 - lr: 0.012500
2023-06-16 13:25:46,802 epoch 30 - iter 60/155 - loss 0.43056084 - time (sec): 0.83 - samples/sec: 8408.22 - lr: 0.012500
2023-06-16 13:25:47,011 epoch 30 - iter 75/155 - loss 0.43466206 - time (sec): 1.04 - samples/sec: 8413.60 - lr: 0.012500
2023-06-16 13:25:47,216 epoch 30 - iter 90/155 - loss 0.43694451 - time (sec): 1.25 - samples/sec: 8423.29 - lr: 0.012500
20

100%|██████████| 20/20 [00:00<00:00, 107.09it/s]


2023-06-16 13:25:48,259 Evaluating as a multi-label problem: False
2023-06-16 13:25:48,293 DEV : loss 0.424318790435791 - f1-score (micro avg)  0.8164
2023-06-16 13:25:48,304 BAD EPOCHS (no improvement): 1
2023-06-16 13:25:48,308 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:48,481 epoch 31 - iter 15/155 - loss 0.43167264 - time (sec): 0.17 - samples/sec: 9885.44 - lr: 0.012500
2023-06-16 13:25:48,669 epoch 31 - iter 30/155 - loss 0.44938866 - time (sec): 0.36 - samples/sec: 9681.04 - lr: 0.012500
2023-06-16 13:25:48,863 epoch 31 - iter 45/155 - loss 0.44858374 - time (sec): 0.55 - samples/sec: 9437.22 - lr: 0.012500
2023-06-16 13:25:49,034 epoch 31 - iter 60/155 - loss 0.44233112 - time (sec): 0.72 - samples/sec: 9461.51 - lr: 0.012500
2023-06-16 13:25:49,230 epoch 31 - iter 75/155 - loss 0.44205064 - time (sec): 0.92 - samples/sec: 9379.92 - lr: 0.012500
2023-06-16 13:25:49,402 epoch 31 - iter 90/155 - loss 0.440

100%|██████████| 20/20 [00:00<00:00, 110.93it/s]

2023-06-16 13:25:50,388 Evaluating as a multi-label problem: False





2023-06-16 13:25:50,418 DEV : loss 0.42279312014579773 - f1-score (micro avg)  0.8146
2023-06-16 13:25:50,430 BAD EPOCHS (no improvement): 2
2023-06-16 13:25:50,435 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:50,603 epoch 32 - iter 15/155 - loss 0.43797827 - time (sec): 0.17 - samples/sec: 10556.73 - lr: 0.012500
2023-06-16 13:25:50,794 epoch 32 - iter 30/155 - loss 0.42376477 - time (sec): 0.36 - samples/sec: 9886.74 - lr: 0.012500
2023-06-16 13:25:50,975 epoch 32 - iter 45/155 - loss 0.43187954 - time (sec): 0.54 - samples/sec: 9866.05 - lr: 0.012500
2023-06-16 13:25:51,146 epoch 32 - iter 60/155 - loss 0.43239027 - time (sec): 0.71 - samples/sec: 9942.35 - lr: 0.012500
2023-06-16 13:25:51,322 epoch 32 - iter 75/155 - loss 0.42706149 - time (sec): 0.88 - samples/sec: 9905.11 - lr: 0.012500
2023-06-16 13:25:51,496 epoch 32 - iter 90/155 - loss 0.43219023 - time (sec): 1.06 - samples/sec: 9882.57 - lr: 0.012500
2

100%|██████████| 20/20 [00:00<00:00, 104.55it/s]


2023-06-16 13:25:52,497 Evaluating as a multi-label problem: False
2023-06-16 13:25:52,524 DEV : loss 0.4188425838947296 - f1-score (micro avg)  0.8164
2023-06-16 13:25:52,539 BAD EPOCHS (no improvement): 3
2023-06-16 13:25:52,545 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:52,740 epoch 33 - iter 15/155 - loss 0.44905547 - time (sec): 0.19 - samples/sec: 8953.35 - lr: 0.012500
2023-06-16 13:25:52,936 epoch 33 - iter 30/155 - loss 0.45419117 - time (sec): 0.39 - samples/sec: 8954.00 - lr: 0.012500
2023-06-16 13:25:53,103 epoch 33 - iter 45/155 - loss 0.43994802 - time (sec): 0.55 - samples/sec: 9262.34 - lr: 0.012500
2023-06-16 13:25:53,298 epoch 33 - iter 60/155 - loss 0.43764514 - time (sec): 0.75 - samples/sec: 9192.72 - lr: 0.012500
2023-06-16 13:25:53,484 epoch 33 - iter 75/155 - loss 0.43200314 - time (sec): 0.94 - samples/sec: 9202.86 - lr: 0.012500
2023-06-16 13:25:53,670 epoch 33 - iter 90/155 - loss 0.42

100%|██████████| 20/20 [00:00<00:00, 101.37it/s]


2023-06-16 13:25:54,701 Evaluating as a multi-label problem: False
2023-06-16 13:25:54,725 DEV : loss 0.41827893257141113 - f1-score (micro avg)  0.8186
2023-06-16 13:25:54,738 Epoch    33: reducing learning rate of group 0 to 6.2500e-03.
2023-06-16 13:25:54,739 BAD EPOCHS (no improvement): 4
2023-06-16 13:25:54,745 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:54,909 epoch 34 - iter 15/155 - loss 0.45021016 - time (sec): 0.16 - samples/sec: 10553.48 - lr: 0.006250
2023-06-16 13:25:55,085 epoch 34 - iter 30/155 - loss 0.44011159 - time (sec): 0.34 - samples/sec: 10080.10 - lr: 0.006250
2023-06-16 13:25:55,260 epoch 34 - iter 45/155 - loss 0.43782203 - time (sec): 0.51 - samples/sec: 10117.37 - lr: 0.006250
2023-06-16 13:25:55,454 epoch 34 - iter 60/155 - loss 0.44027785 - time (sec): 0.71 - samples/sec: 9763.20 - lr: 0.006250
2023-06-16 13:25:55,627 epoch 34 - iter 75/155 - loss 0.43335353 - time (sec): 0.88 - samp

100%|██████████| 20/20 [00:00<00:00, 129.80it/s]

2023-06-16 13:25:56,639 Evaluating as a multi-label problem: False
2023-06-16 13:25:56,662 DEV : loss 0.42004600167274475 - f1-score (micro avg)  0.8182
2023-06-16 13:25:56,673 BAD EPOCHS (no improvement): 1





2023-06-16 13:25:56,679 ----------------------------------------------------------------------------------------------------
2023-06-16 13:25:56,841 epoch 35 - iter 15/155 - loss 0.44880619 - time (sec): 0.16 - samples/sec: 10585.60 - lr: 0.006250
2023-06-16 13:25:57,000 epoch 35 - iter 30/155 - loss 0.44176804 - time (sec): 0.32 - samples/sec: 10881.01 - lr: 0.006250
2023-06-16 13:25:57,168 epoch 35 - iter 45/155 - loss 0.43569555 - time (sec): 0.49 - samples/sec: 10715.18 - lr: 0.006250
2023-06-16 13:25:57,329 epoch 35 - iter 60/155 - loss 0.43335595 - time (sec): 0.65 - samples/sec: 10672.12 - lr: 0.006250
2023-06-16 13:25:57,490 epoch 35 - iter 75/155 - loss 0.43477800 - time (sec): 0.81 - samples/sec: 10732.03 - lr: 0.006250
2023-06-16 13:25:57,647 epoch 35 - iter 90/155 - loss 0.43432790 - time (sec): 0.97 - samples/sec: 10825.75 - lr: 0.006250
2023-06-16 13:25:57,827 epoch 35 - iter 105/155 - loss 0.43240507 - time (sec): 1.15 - samples/sec: 10616.08 - lr: 0.006250
2023-06-16 13

100%|██████████| 20/20 [00:00<00:00, 86.95it/s]

2023-06-16 13:25:58,666 Evaluating as a multi-label problem: False
2023-06-16 13:25:58,698 DEV : loss 0.41801196336746216 - f1-score (micro avg)  0.8186
2023-06-16 13:25:58,712 BAD EPOCHS (no improvement): 2
2023-06-16 13:25:58,717 ----------------------------------------------------------------------------------------------------





2023-06-16 13:25:58,902 epoch 36 - iter 15/155 - loss 0.42992840 - time (sec): 0.18 - samples/sec: 9677.82 - lr: 0.006250
2023-06-16 13:25:59,088 epoch 36 - iter 30/155 - loss 0.42437268 - time (sec): 0.37 - samples/sec: 9487.69 - lr: 0.006250
2023-06-16 13:25:59,261 epoch 36 - iter 45/155 - loss 0.42986976 - time (sec): 0.54 - samples/sec: 9709.70 - lr: 0.006250
2023-06-16 13:25:59,445 epoch 36 - iter 60/155 - loss 0.43546348 - time (sec): 0.73 - samples/sec: 9611.52 - lr: 0.006250
2023-06-16 13:25:59,618 epoch 36 - iter 75/155 - loss 0.43046747 - time (sec): 0.90 - samples/sec: 9656.07 - lr: 0.006250
2023-06-16 13:25:59,793 epoch 36 - iter 90/155 - loss 0.42995858 - time (sec): 1.07 - samples/sec: 9782.41 - lr: 0.006250
2023-06-16 13:25:59,954 epoch 36 - iter 105/155 - loss 0.42820845 - time (sec): 1.23 - samples/sec: 9933.09 - lr: 0.006250
2023-06-16 13:26:00,130 epoch 36 - iter 120/155 - loss 0.43065659 - time (sec): 1.41 - samples/sec: 9873.87 - lr: 0.006250
2023-06-16 13:26:00,31

100%|██████████| 20/20 [00:00<00:00, 94.76it/s] 

2023-06-16 13:26:00,789 Evaluating as a multi-label problem: False
2023-06-16 13:26:00,817 DEV : loss 0.4195702075958252 - f1-score (micro avg)  0.8164
2023-06-16 13:26:00,831 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:00,836 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:01,020 epoch 37 - iter 15/155 - loss 0.42331132 - time (sec): 0.18 - samples/sec: 9544.20 - lr: 0.006250
2023-06-16 13:26:01,189 epoch 37 - iter 30/155 - loss 0.42499073 - time (sec): 0.35 - samples/sec: 9927.00 - lr: 0.006250
2023-06-16 13:26:01,366 epoch 37 - iter 45/155 - loss 0.42350593 - time (sec): 0.53 - samples/sec: 9967.40 - lr: 0.006250
2023-06-16 13:26:01,525 epoch 37 - iter 60/155 - loss 0.43181808 - time (sec): 0.69 - samples/sec: 10140.68 - lr: 0.006250
2023-06-16 13:26:01,689 epoch 37 - iter 75/155 - loss 0.43217665 - time (sec): 0.85 - samples/sec: 10182.08 - lr: 0.006250
2023-06-16 13:26:01,884 epoch 37 - iter 90/155 - loss 0.42640409 - time (sec): 1.05 - samples/sec: 9994.79 - lr: 0.006250
2023-06-16 13:26:02,064 epoch 37 - iter 105/155 - loss 0.42862929 - time (sec): 1.23 - samples/sec: 9906.35 - lr: 0.006250
2023-06-16 13:26:02,239 epoch 37 - iter 120/155 - loss 0.42938726 - time (sec): 1.40 - samples/sec: 9956.92 - lr: 0.006250
2023-06-16 13:26:02,

100%|██████████| 20/20 [00:00<00:00, 119.38it/s]

2023-06-16 13:26:02,837 Evaluating as a multi-label problem: False





2023-06-16 13:26:02,862 DEV : loss 0.41919171810150146 - f1-score (micro avg)  0.8182
2023-06-16 13:26:02,874 Epoch    37: reducing learning rate of group 0 to 3.1250e-03.
2023-06-16 13:26:02,876 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:02,881 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:03,054 epoch 38 - iter 15/155 - loss 0.41664768 - time (sec): 0.17 - samples/sec: 11053.71 - lr: 0.003125
2023-06-16 13:26:03,218 epoch 38 - iter 30/155 - loss 0.43045773 - time (sec): 0.34 - samples/sec: 11066.52 - lr: 0.003125
2023-06-16 13:26:03,379 epoch 38 - iter 45/155 - loss 0.43042579 - time (sec): 0.50 - samples/sec: 10874.14 - lr: 0.003125
2023-06-16 13:26:03,536 epoch 38 - iter 60/155 - loss 0.42314661 - time (sec): 0.65 - samples/sec: 10855.14 - lr: 0.003125
2023-06-16 13:26:03,686 epoch 38 - iter 75/155 - loss 0.42471302 - time (sec): 0.80 - samples/sec: 10850.45 - lr: 0.003125
2023-06-16 13:26:03,851 epoch 38 

100%|██████████| 20/20 [00:00<00:00, 123.65it/s]

2023-06-16 13:26:04,711 Evaluating as a multi-label problem: False
2023-06-16 13:26:04,736 DEV : loss 0.4179878234863281 - f1-score (micro avg)  0.8186





2023-06-16 13:26:04,749 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:04,753 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:04,911 epoch 39 - iter 15/155 - loss 0.44727905 - time (sec): 0.16 - samples/sec: 11123.52 - lr: 0.003125
2023-06-16 13:26:05,068 epoch 39 - iter 30/155 - loss 0.43350613 - time (sec): 0.31 - samples/sec: 11204.59 - lr: 0.003125
2023-06-16 13:26:05,226 epoch 39 - iter 45/155 - loss 0.43598311 - time (sec): 0.47 - samples/sec: 11156.98 - lr: 0.003125
2023-06-16 13:26:05,382 epoch 39 - iter 60/155 - loss 0.43493672 - time (sec): 0.63 - samples/sec: 11067.93 - lr: 0.003125
2023-06-16 13:26:05,546 epoch 39 - iter 75/155 - loss 0.43446607 - time (sec): 0.79 - samples/sec: 11021.99 - lr: 0.003125
2023-06-16 13:26:05,706 epoch 39 - iter 90/155 - loss 0.42974611 - time (sec): 0.95 - samples/sec: 10974.75 - lr: 0.003125
2023-06-16 13:26:05,865 epoch 39 - iter 105/155 - loss 0.43216929 - time (sec): 1.

100%|██████████| 20/20 [00:00<00:00, 35.41it/s]

2023-06-16 13:26:06,986 Evaluating as a multi-label problem: False
2023-06-16 13:26:07,012 DEV : loss 0.41711634397506714 - f1-score (micro avg)  0.8182
2023-06-16 13:26:07,024 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:07,028 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:07,194 epoch 40 - iter 15/155 - loss 0.42757627 - time (sec): 0.16 - samples/sec: 10814.67 - lr: 0.003125
2023-06-16 13:26:07,364 epoch 40 - iter 30/155 - loss 0.42979554 - time (sec): 0.33 - samples/sec: 10661.52 - lr: 0.003125
2023-06-16 13:26:07,531 epoch 40 - iter 45/155 - loss 0.43051775 - time (sec): 0.50 - samples/sec: 10581.68 - lr: 0.003125
2023-06-16 13:26:07,687 epoch 40 - iter 60/155 - loss 0.42866952 - time (sec): 0.66 - samples/sec: 10689.28 - lr: 0.003125
2023-06-16 13:26:07,846 epoch 40 - iter 75/155 - loss 0.42665067 - time (sec): 0.82 - samples/sec: 10779.31 - lr: 0.003125
2023-06-16 13:26:08,001 epoch 40 - iter 90/155 - loss 0.43282770 - time (sec): 0.97 - samples/sec: 10772.36 - lr: 0.003125
2023-06-16 13:26:08,171 epoch 40 - iter 105/155 - loss 0.43377308 - time (sec): 1.14 - samples/sec: 10685.98 - lr: 0.003125
2023-06-16 13:26:08,331 epoch 40 - iter 120/155 - loss 0.43275481 - time (sec): 1.30 - samples/sec: 10708.25 - lr: 0.003125
2023-06-16 13:

100%|██████████| 20/20 [00:00<00:00, 119.55it/s]

2023-06-16 13:26:08,895 Evaluating as a multi-label problem: False





2023-06-16 13:26:08,923 DEV : loss 0.4162313938140869 - f1-score (micro avg)  0.8159
2023-06-16 13:26:08,934 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:08,939 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:09,109 epoch 41 - iter 15/155 - loss 0.44189781 - time (sec): 0.17 - samples/sec: 10567.61 - lr: 0.003125
2023-06-16 13:26:09,270 epoch 41 - iter 30/155 - loss 0.43360735 - time (sec): 0.33 - samples/sec: 10622.41 - lr: 0.003125
2023-06-16 13:26:09,428 epoch 41 - iter 45/155 - loss 0.43262650 - time (sec): 0.49 - samples/sec: 10690.12 - lr: 0.003125
2023-06-16 13:26:09,589 epoch 41 - iter 60/155 - loss 0.42599046 - time (sec): 0.65 - samples/sec: 10681.59 - lr: 0.003125
2023-06-16 13:26:09,752 epoch 41 - iter 75/155 - loss 0.42208635 - time (sec): 0.81 - samples/sec: 10740.18 - lr: 0.003125
2023-06-16 13:26:09,908 epoch 41 - iter 90/155 - loss 0.42460761 - time (sec): 0.97 - samples/sec: 10784.36 - lr: 0.0031

100%|██████████| 20/20 [00:00<00:00, 124.32it/s]

2023-06-16 13:26:10,775 Evaluating as a multi-label problem: False
2023-06-16 13:26:10,800 DEV : loss 0.4153900742530823 - f1-score (micro avg)  0.8159





2023-06-16 13:26:10,815 Epoch    41: reducing learning rate of group 0 to 1.5625e-03.
2023-06-16 13:26:10,817 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:10,823 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:10,980 epoch 42 - iter 15/155 - loss 0.44115945 - time (sec): 0.16 - samples/sec: 11063.62 - lr: 0.001563
2023-06-16 13:26:11,139 epoch 42 - iter 30/155 - loss 0.43187104 - time (sec): 0.31 - samples/sec: 10964.96 - lr: 0.001563
2023-06-16 13:26:11,297 epoch 42 - iter 45/155 - loss 0.44268158 - time (sec): 0.47 - samples/sec: 10954.93 - lr: 0.001563
2023-06-16 13:26:11,458 epoch 42 - iter 60/155 - loss 0.44120661 - time (sec): 0.63 - samples/sec: 10979.06 - lr: 0.001563
2023-06-16 13:26:11,618 epoch 42 - iter 75/155 - loss 0.44151970 - time (sec): 0.79 - samples/sec: 10968.79 - lr: 0.001563
2023-06-16 13:26:11,776 epoch 42 - iter 90/155 - loss 0.43750484 - time (sec): 0.95 - samples/sec: 10972.83 - lr: 0.001

100%|██████████| 20/20 [00:00<00:00, 110.15it/s]

2023-06-16 13:26:12,665 Evaluating as a multi-label problem: False





2023-06-16 13:26:12,695 DEV : loss 0.4193614721298218 - f1-score (micro avg)  0.8177
2023-06-16 13:26:12,707 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:12,711 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:12,894 epoch 43 - iter 15/155 - loss 0.42481953 - time (sec): 0.18 - samples/sec: 9801.98 - lr: 0.001563
2023-06-16 13:26:13,056 epoch 43 - iter 30/155 - loss 0.44278079 - time (sec): 0.34 - samples/sec: 10233.93 - lr: 0.001563
2023-06-16 13:26:13,223 epoch 43 - iter 45/155 - loss 0.44277903 - time (sec): 0.51 - samples/sec: 10280.17 - lr: 0.001563
2023-06-16 13:26:13,390 epoch 43 - iter 60/155 - loss 0.43801993 - time (sec): 0.68 - samples/sec: 10319.72 - lr: 0.001563
2023-06-16 13:26:13,561 epoch 43 - iter 75/155 - loss 0.43054693 - time (sec): 0.85 - samples/sec: 10304.57 - lr: 0.001563
2023-06-16 13:26:13,727 epoch 43 - iter 90/155 - loss 0.43055741 - time (sec): 1.01 - samples/sec: 10344.75 - lr: 0.00156

100%|██████████| 20/20 [00:00<00:00, 119.85it/s]

2023-06-16 13:26:14,659 Evaluating as a multi-label problem: False





2023-06-16 13:26:14,687 DEV : loss 0.41735711693763733 - f1-score (micro avg)  0.8182
2023-06-16 13:26:14,700 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:14,705 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:14,897 epoch 44 - iter 15/155 - loss 0.43090493 - time (sec): 0.19 - samples/sec: 9195.19 - lr: 0.001563
2023-06-16 13:26:15,068 epoch 44 - iter 30/155 - loss 0.42134500 - time (sec): 0.36 - samples/sec: 9595.83 - lr: 0.001563
2023-06-16 13:26:15,236 epoch 44 - iter 45/155 - loss 0.42577768 - time (sec): 0.53 - samples/sec: 9874.92 - lr: 0.001563
2023-06-16 13:26:15,415 epoch 44 - iter 60/155 - loss 0.42245712 - time (sec): 0.71 - samples/sec: 9993.10 - lr: 0.001563
2023-06-16 13:26:15,583 epoch 44 - iter 75/155 - loss 0.42326558 - time (sec): 0.87 - samples/sec: 10030.37 - lr: 0.001563
2023-06-16 13:26:15,763 epoch 44 - iter 90/155 - loss 0.42561083 - time (sec): 1.05 - samples/sec: 9974.88 - lr: 0.001563
2

100%|██████████| 20/20 [00:00<00:00, 97.90it/s]

2023-06-16 13:26:16,765 Evaluating as a multi-label problem: False
2023-06-16 13:26:16,799 DEV : loss 0.4193620979785919 - f1-score (micro avg)  0.8213
2023-06-16 13:26:16,810 BAD EPOCHS (no improvement): 0
2023-06-16 13:26:16,815 saving best model





2023-06-16 13:26:20,984 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:21,155 epoch 45 - iter 15/155 - loss 0.41023384 - time (sec): 0.17 - samples/sec: 10328.64 - lr: 0.001563
2023-06-16 13:26:21,313 epoch 45 - iter 30/155 - loss 0.41508678 - time (sec): 0.32 - samples/sec: 10613.68 - lr: 0.001563
2023-06-16 13:26:21,470 epoch 45 - iter 45/155 - loss 0.42153240 - time (sec): 0.48 - samples/sec: 10657.97 - lr: 0.001563
2023-06-16 13:26:21,639 epoch 45 - iter 60/155 - loss 0.41868840 - time (sec): 0.65 - samples/sec: 10616.74 - lr: 0.001563
2023-06-16 13:26:21,811 epoch 45 - iter 75/155 - loss 0.42662530 - time (sec): 0.82 - samples/sec: 10484.29 - lr: 0.001563
2023-06-16 13:26:21,973 epoch 45 - iter 90/155 - loss 0.42567528 - time (sec): 0.98 - samples/sec: 10589.80 - lr: 0.001563
2023-06-16 13:26:22,145 epoch 45 - iter 105/155 - loss 0.42720191 - time (sec): 1.16 - samples/sec: 10502.82 - lr: 0.001563
2023-06-16 13

100%|██████████| 20/20 [00:00<00:00, 119.73it/s]

2023-06-16 13:26:22,868 Evaluating as a multi-label problem: False
2023-06-16 13:26:22,891 DEV : loss 0.4170915186405182 - f1-score (micro avg)  0.8168





2023-06-16 13:26:22,904 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:22,908 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:23,066 epoch 46 - iter 15/155 - loss 0.44277177 - time (sec): 0.16 - samples/sec: 11031.76 - lr: 0.001563
2023-06-16 13:26:23,221 epoch 46 - iter 30/155 - loss 0.43765221 - time (sec): 0.31 - samples/sec: 10997.27 - lr: 0.001563
2023-06-16 13:26:23,382 epoch 46 - iter 45/155 - loss 0.43810996 - time (sec): 0.47 - samples/sec: 11168.93 - lr: 0.001563
2023-06-16 13:26:23,543 epoch 46 - iter 60/155 - loss 0.43304985 - time (sec): 0.63 - samples/sec: 11057.98 - lr: 0.001563
2023-06-16 13:26:23,697 epoch 46 - iter 75/155 - loss 0.43158261 - time (sec): 0.79 - samples/sec: 11134.29 - lr: 0.001563
2023-06-16 13:26:23,856 epoch 46 - iter 90/155 - loss 0.43371977 - time (sec): 0.95 - samples/sec: 11114.04 - lr: 0.001563
2023-06-16 13:26:24,012 epoch 46 - iter 105/155 - loss 0.43203639 - time (sec): 1.

100%|██████████| 20/20 [00:00<00:00, 124.26it/s]

2023-06-16 13:26:24,715 Evaluating as a multi-label problem: False





2023-06-16 13:26:24,748 DEV : loss 0.4189716875553131 - f1-score (micro avg)  0.8173
2023-06-16 13:26:24,760 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:24,766 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:24,926 epoch 47 - iter 15/155 - loss 0.41877666 - time (sec): 0.16 - samples/sec: 11024.16 - lr: 0.001563
2023-06-16 13:26:25,085 epoch 47 - iter 30/155 - loss 0.42003709 - time (sec): 0.32 - samples/sec: 10972.13 - lr: 0.001563
2023-06-16 13:26:25,241 epoch 47 - iter 45/155 - loss 0.42694376 - time (sec): 0.47 - samples/sec: 11118.10 - lr: 0.001563
2023-06-16 13:26:25,395 epoch 47 - iter 60/155 - loss 0.42523205 - time (sec): 0.63 - samples/sec: 11189.33 - lr: 0.001563
2023-06-16 13:26:25,550 epoch 47 - iter 75/155 - loss 0.42796557 - time (sec): 0.78 - samples/sec: 11182.12 - lr: 0.001563
2023-06-16 13:26:25,705 epoch 47 - iter 90/155 - loss 0.42781820 - time (sec): 0.94 - samples/sec: 11142.26 - lr: 0.0015

100%|██████████| 20/20 [00:00<00:00, 124.33it/s]

2023-06-16 13:26:26,561 Evaluating as a multi-label problem: False
2023-06-16 13:26:26,585 DEV : loss 0.41747692227363586 - f1-score (micro avg)  0.8191





2023-06-16 13:26:26,599 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:26,603 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:26,769 epoch 48 - iter 15/155 - loss 0.43019733 - time (sec): 0.16 - samples/sec: 10349.14 - lr: 0.001563
2023-06-16 13:26:26,929 epoch 48 - iter 30/155 - loss 0.42194904 - time (sec): 0.32 - samples/sec: 10629.66 - lr: 0.001563
2023-06-16 13:26:27,081 epoch 48 - iter 45/155 - loss 0.42189137 - time (sec): 0.48 - samples/sec: 11041.64 - lr: 0.001563
2023-06-16 13:26:27,268 epoch 48 - iter 60/155 - loss 0.42543317 - time (sec): 0.66 - samples/sec: 10624.35 - lr: 0.001563
2023-06-16 13:26:27,436 epoch 48 - iter 75/155 - loss 0.42657622 - time (sec): 0.83 - samples/sec: 10602.63 - lr: 0.001563
2023-06-16 13:26:27,602 epoch 48 - iter 90/155 - loss 0.42737797 - time (sec): 1.00 - samples/sec: 10639.94 - lr: 0.001563
2023-06-16 13:26:27,767 epoch 48 - iter 105/155 - loss 0.42841880 - time (sec): 1.

100%|██████████| 20/20 [00:00<00:00, 87.67it/s]

2023-06-16 13:26:28,601 Evaluating as a multi-label problem: False
2023-06-16 13:26:28,631 DEV : loss 0.4164004921913147 - f1-score (micro avg)  0.8182
2023-06-16 13:26:28,652 Epoch    48: reducing learning rate of group 0 to 7.8125e-04.
2023-06-16 13:26:28,653 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:28,660 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:28,845 epoch 49 - iter 15/155 - loss 0.41519425 - time (sec): 0.18 - samples/sec: 9638.27 - lr: 0.000781
2023-06-16 13:26:29,028 epoch 49 - iter 30/155 - loss 0.42699255 - time (sec): 0.37 - samples/sec: 9719.69 - lr: 0.000781
2023-06-16 13:26:29,193 epoch 49 - iter 45/155 - loss 0.42259614 - time (sec): 0.53 - samples/sec: 9858.75 - lr: 0.000781
2023-06-16 13:26:29,383 epoch 49 - iter 60/155 - loss 0.43036907 - time (sec): 0.72 - samples/sec: 9707.81 - lr: 0.000781
2023-06-16 13:26:29,634 epoch 49 - iter 75/155 - loss 0.43067053 - time (sec): 0.97 - samples/sec: 8967.22 - lr: 0.000781
2023-06-16 13:26:29,827 epoch 49 - iter 90/155 - loss 0.43054722 - time (sec): 1.17 - samples/sec: 8919.75 - lr: 0.000781
2023-06-16 13:26:30,019 epoch 49 - iter 105/155 - loss 0.42802733 - time (sec): 1.36 - samples/sec: 8935.13 - lr: 0.000781
2023-06-16 13:26:30,223 epoch 49 - iter 120/155 - loss 0.42924653 - time (sec): 1.56 - samples/sec: 8867.94 - lr: 0.000781
2023-06-16 13:26:30,44

100%|██████████| 20/20 [00:00<00:00, 99.42it/s]

2023-06-16 13:26:30,936 Evaluating as a multi-label problem: False
2023-06-16 13:26:30,959 DEV : loss 0.41703277826309204 - f1-score (micro avg)  0.8195
2023-06-16 13:26:30,972 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:30,978 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:31,213 epoch 50 - iter 15/155 - loss 0.42447261 - time (sec): 0.23 - samples/sec: 7523.53 - lr: 0.000781
2023-06-16 13:26:31,428 epoch 50 - iter 30/155 - loss 0.42957916 - time (sec): 0.45 - samples/sec: 7774.75 - lr: 0.000781
2023-06-16 13:26:31,620 epoch 50 - iter 45/155 - loss 0.42392828 - time (sec): 0.64 - samples/sec: 8162.00 - lr: 0.000781
2023-06-16 13:26:31,850 epoch 50 - iter 60/155 - loss 0.42252752 - time (sec): 0.87 - samples/sec: 7993.88 - lr: 0.000781
2023-06-16 13:26:32,049 epoch 50 - iter 75/155 - loss 0.42437278 - time (sec): 1.07 - samples/sec: 8157.07 - lr: 0.000781
2023-06-16 13:26:32,271 epoch 50 - iter 90/155 - loss 0.42391005 - time (sec): 1.29 - samples/sec: 8167.28 - lr: 0.000781
2023-06-16 13:26:32,467 epoch 50 - iter 105/155 - loss 0.42681982 - time (sec): 1.49 - samples/sec: 8254.00 - lr: 0.000781
2023-06-16 13:26:32,654 epoch 50 - iter 120/155 - loss 0.42694770 - time (sec): 1.67 - samples/sec: 8351.10 - lr: 0.000781
2023-06-16 13:26:32,83

100%|██████████| 20/20 [00:00<00:00, 111.04it/s]


2023-06-16 13:26:33,302 Evaluating as a multi-label problem: False
2023-06-16 13:26:33,339 DEV : loss 0.41687554121017456 - f1-score (micro avg)  0.8191
2023-06-16 13:26:33,361 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:33,366 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:33,559 epoch 51 - iter 15/155 - loss 0.41583811 - time (sec): 0.19 - samples/sec: 9495.10 - lr: 0.000781
2023-06-16 13:26:33,750 epoch 51 - iter 30/155 - loss 0.43158965 - time (sec): 0.38 - samples/sec: 9512.39 - lr: 0.000781
2023-06-16 13:26:33,950 epoch 51 - iter 45/155 - loss 0.42917761 - time (sec): 0.58 - samples/sec: 9185.48 - lr: 0.000781
2023-06-16 13:26:34,139 epoch 51 - iter 60/155 - loss 0.43075334 - time (sec): 0.77 - samples/sec: 9064.74 - lr: 0.000781
2023-06-16 13:26:34,337 epoch 51 - iter 75/155 - loss 0.42881905 - time (sec): 0.97 - samples/sec: 8996.10 - lr: 0.000781
2023-06-16 13:26:34,518 epoch 51 - iter 90/155 - loss 0.4

100%|██████████| 20/20 [00:00<00:00, 103.55it/s]


2023-06-16 13:26:35,579 Evaluating as a multi-label problem: False
2023-06-16 13:26:35,604 DEV : loss 0.4169236421585083 - f1-score (micro avg)  0.8191
2023-06-16 13:26:35,618 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:35,624 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:35,813 epoch 52 - iter 15/155 - loss 0.42625000 - time (sec): 0.19 - samples/sec: 9944.06 - lr: 0.000781
2023-06-16 13:26:36,010 epoch 52 - iter 30/155 - loss 0.41724887 - time (sec): 0.38 - samples/sec: 9383.60 - lr: 0.000781
2023-06-16 13:26:36,214 epoch 52 - iter 45/155 - loss 0.43095185 - time (sec): 0.59 - samples/sec: 9051.96 - lr: 0.000781
2023-06-16 13:26:36,397 epoch 52 - iter 60/155 - loss 0.43401574 - time (sec): 0.77 - samples/sec: 9144.18 - lr: 0.000781
2023-06-16 13:26:36,598 epoch 52 - iter 75/155 - loss 0.43441752 - time (sec): 0.97 - samples/sec: 8992.68 - lr: 0.000781
2023-06-16 13:26:36,788 epoch 52 - iter 90/155 - loss 0.43

100%|██████████| 20/20 [00:00<00:00, 93.21it/s]

2023-06-16 13:26:37,892 Evaluating as a multi-label problem: False
2023-06-16 13:26:37,919 DEV : loss 0.4176273047924042 - f1-score (micro avg)  0.8173
2023-06-16 13:26:37,933 Epoch    52: reducing learning rate of group 0 to 3.9063e-04.
2023-06-16 13:26:37,939 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:37,945 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:38,160 epoch 53 - iter 15/155 - loss 0.42908133 - time (sec): 0.21 - samples/sec: 8423.63 - lr: 0.000391
2023-06-16 13:26:38,342 epoch 53 - iter 30/155 - loss 0.43570036 - time (sec): 0.39 - samples/sec: 8968.93 - lr: 0.000391
2023-06-16 13:26:38,531 epoch 53 - iter 45/155 - loss 0.43551611 - time (sec): 0.58 - samples/sec: 8925.26 - lr: 0.000391
2023-06-16 13:26:38,720 epoch 53 - iter 60/155 - loss 0.43538783 - time (sec): 0.77 - samples/sec: 9001.03 - lr: 0.000391
2023-06-16 13:26:38,908 epoch 53 - iter 75/155 - loss 0.43009953 - time (sec): 0.96 - samples/sec: 9093.65 - lr: 0.000391
2023-06-16 13:26:39,094 epoch 53 - iter 90/155 - loss 0.42898265 - time (sec): 1.15 - samples/sec: 9158.08 - lr: 0.000391
2023-06-16 13:26:39,274 epoch 53 - iter 105/155 - loss 0.42738379 - time (sec): 1.33 - samples/sec: 9222.42 - lr: 0.000391
2023-06-16 13:26:39,448 epoch 53 - iter 120/155 - loss 0.42703209 - time (sec): 1.50 - samples/sec: 9216.44 - lr: 0.000391
2023-06-16 13:26:39,62

100%|██████████| 20/20 [00:00<00:00, 94.13it/s]

2023-06-16 13:26:40,124 Evaluating as a multi-label problem: False
2023-06-16 13:26:40,161 DEV : loss 0.4172888994216919 - f1-score (micro avg)  0.8173
2023-06-16 13:26:40,179 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:40,184 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:40,371 epoch 54 - iter 15/155 - loss 0.41761842 - time (sec): 0.19 - samples/sec: 9196.32 - lr: 0.000391
2023-06-16 13:26:40,563 epoch 54 - iter 30/155 - loss 0.41631636 - time (sec): 0.38 - samples/sec: 9098.69 - lr: 0.000391
2023-06-16 13:26:40,745 epoch 54 - iter 45/155 - loss 0.41883790 - time (sec): 0.56 - samples/sec: 9304.74 - lr: 0.000391
2023-06-16 13:26:40,936 epoch 54 - iter 60/155 - loss 0.42068450 - time (sec): 0.75 - samples/sec: 9316.99 - lr: 0.000391
2023-06-16 13:26:41,136 epoch 54 - iter 75/155 - loss 0.42144654 - time (sec): 0.95 - samples/sec: 9220.89 - lr: 0.000391
2023-06-16 13:26:41,315 epoch 54 - iter 90/155 - loss 0.42488822 - time (sec): 1.13 - samples/sec: 9261.72 - lr: 0.000391
2023-06-16 13:26:41,513 epoch 54 - iter 105/155 - loss 0.42486647 - time (sec): 1.33 - samples/sec: 9198.07 - lr: 0.000391
2023-06-16 13:26:41,714 epoch 54 - iter 120/155 - loss 0.42632782 - time (sec): 1.53 - samples/sec: 9156.47 - lr: 0.000391
2023-06-16 13:26:41,90

100%|██████████| 20/20 [00:00<00:00, 100.75it/s]

2023-06-16 13:26:42,355 Evaluating as a multi-label problem: False
2023-06-16 13:26:42,400 DEV : loss 0.41732069849967957 - f1-score (micro avg)  0.8186
2023-06-16 13:26:42,418 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:42,426 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:42,599 epoch 55 - iter 15/155 - loss 0.43185741 - time (sec): 0.17 - samples/sec: 10273.45 - lr: 0.000391
2023-06-16 13:26:42,771 epoch 55 - iter 30/155 - loss 0.42187186 - time (sec): 0.34 - samples/sec: 10358.14 - lr: 0.000391
2023-06-16 13:26:42,961 epoch 55 - iter 45/155 - loss 0.41813371 - time (sec): 0.53 - samples/sec: 9802.66 - lr: 0.000391
2023-06-16 13:26:43,139 epoch 55 - iter 60/155 - loss 0.41813045 - time (sec): 0.71 - samples/sec: 9765.41 - lr: 0.000391
2023-06-16 13:26:43,321 epoch 55 - iter 75/155 - loss 0.42258676 - time (sec): 0.89 - samples/sec: 9703.57 - lr: 0.000391
2023-06-16 13:26:43,496 epoch 55 - iter 90/155 - loss 0.42519999 - time (sec): 1.07 - samples/sec: 9749.82 - lr: 0.000391
2023-06-16 13:26:43,677 epoch 55 - iter 105/155 - loss 0.43026171 - time (sec): 1.25 - samples/sec: 9727.22 - lr: 0.000391
2023-06-16 13:26:43,886 epoch 55 - iter 120/155 - loss 0.43192794 - time (sec): 1.46 - samples/sec: 9496.44 - lr: 0.000391
2023-06-16 13:26:44,

100%|██████████| 20/20 [00:00<00:00, 32.03it/s] 

2023-06-16 13:26:44,959 Evaluating as a multi-label problem: False
2023-06-16 13:26:44,986 DEV : loss 0.4170987010002136 - f1-score (micro avg)  0.8186
2023-06-16 13:26:45,001 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:45,009 ----------------------------------------------------------------------------------------------------





2023-06-16 13:26:45,195 epoch 56 - iter 15/155 - loss 0.40786237 - time (sec): 0.18 - samples/sec: 9794.65 - lr: 0.000391
2023-06-16 13:26:45,372 epoch 56 - iter 30/155 - loss 0.41972578 - time (sec): 0.36 - samples/sec: 9856.81 - lr: 0.000391
2023-06-16 13:26:45,543 epoch 56 - iter 45/155 - loss 0.42426651 - time (sec): 0.53 - samples/sec: 9968.38 - lr: 0.000391
2023-06-16 13:26:45,724 epoch 56 - iter 60/155 - loss 0.42728643 - time (sec): 0.71 - samples/sec: 9833.29 - lr: 0.000391
2023-06-16 13:26:45,903 epoch 56 - iter 75/155 - loss 0.42888941 - time (sec): 0.89 - samples/sec: 9816.29 - lr: 0.000391
2023-06-16 13:26:46,084 epoch 56 - iter 90/155 - loss 0.42905012 - time (sec): 1.07 - samples/sec: 9795.28 - lr: 0.000391
2023-06-16 13:26:46,284 epoch 56 - iter 105/155 - loss 0.42972786 - time (sec): 1.27 - samples/sec: 9631.15 - lr: 0.000391
2023-06-16 13:26:46,457 epoch 56 - iter 120/155 - loss 0.43005415 - time (sec): 1.45 - samples/sec: 9650.50 - lr: 0.000391
2023-06-16 13:26:46,62

100%|██████████| 20/20 [00:00<00:00, 110.44it/s]

2023-06-16 13:26:47,058 Evaluating as a multi-label problem: False





2023-06-16 13:26:47,086 DEV : loss 0.41708436608314514 - f1-score (micro avg)  0.8186
2023-06-16 13:26:47,098 Epoch    56: reducing learning rate of group 0 to 1.9531e-04.
2023-06-16 13:26:47,099 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:47,104 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:47,273 epoch 57 - iter 15/155 - loss 0.42779814 - time (sec): 0.17 - samples/sec: 10520.06 - lr: 0.000195
2023-06-16 13:26:47,428 epoch 57 - iter 30/155 - loss 0.42308348 - time (sec): 0.32 - samples/sec: 10831.97 - lr: 0.000195
2023-06-16 13:26:47,590 epoch 57 - iter 45/155 - loss 0.43021778 - time (sec): 0.48 - samples/sec: 10807.49 - lr: 0.000195
2023-06-16 13:26:47,757 epoch 57 - iter 60/155 - loss 0.43306949 - time (sec): 0.65 - samples/sec: 10793.18 - lr: 0.000195
2023-06-16 13:26:47,920 epoch 57 - iter 75/155 - loss 0.43437328 - time (sec): 0.82 - samples/sec: 10745.31 - lr: 0.000195
2023-06-16 13:26:48,080 epoch 57 

100%|██████████| 20/20 [00:00<00:00, 118.08it/s]

2023-06-16 13:26:48,968 Evaluating as a multi-label problem: False





2023-06-16 13:26:48,998 DEV : loss 0.4167451560497284 - f1-score (micro avg)  0.8191
2023-06-16 13:26:49,010 BAD EPOCHS (no improvement): 1
2023-06-16 13:26:49,015 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:49,199 epoch 58 - iter 15/155 - loss 0.43125271 - time (sec): 0.18 - samples/sec: 9639.93 - lr: 0.000195
2023-06-16 13:26:49,370 epoch 58 - iter 30/155 - loss 0.42873174 - time (sec): 0.35 - samples/sec: 9993.30 - lr: 0.000195
2023-06-16 13:26:49,536 epoch 58 - iter 45/155 - loss 0.43386158 - time (sec): 0.52 - samples/sec: 10243.31 - lr: 0.000195
2023-06-16 13:26:49,704 epoch 58 - iter 60/155 - loss 0.43617289 - time (sec): 0.69 - samples/sec: 10264.15 - lr: 0.000195
2023-06-16 13:26:49,874 epoch 58 - iter 75/155 - loss 0.43408101 - time (sec): 0.86 - samples/sec: 10199.21 - lr: 0.000195
2023-06-16 13:26:50,038 epoch 58 - iter 90/155 - loss 0.43553812 - time (sec): 1.02 - samples/sec: 10268.15 - lr: 0.000195

100%|██████████| 20/20 [00:00<00:00, 121.34it/s]

2023-06-16 13:26:50,941 Evaluating as a multi-label problem: False





2023-06-16 13:26:50,971 DEV : loss 0.41684049367904663 - f1-score (micro avg)  0.8177
2023-06-16 13:26:50,984 BAD EPOCHS (no improvement): 2
2023-06-16 13:26:50,989 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:51,165 epoch 59 - iter 15/155 - loss 0.41941303 - time (sec): 0.17 - samples/sec: 9846.93 - lr: 0.000195
2023-06-16 13:26:51,347 epoch 59 - iter 30/155 - loss 0.42755748 - time (sec): 0.36 - samples/sec: 9603.12 - lr: 0.000195
2023-06-16 13:26:51,516 epoch 59 - iter 45/155 - loss 0.42735622 - time (sec): 0.52 - samples/sec: 9847.96 - lr: 0.000195
2023-06-16 13:26:51,682 epoch 59 - iter 60/155 - loss 0.43175936 - time (sec): 0.69 - samples/sec: 10041.25 - lr: 0.000195
2023-06-16 13:26:51,849 epoch 59 - iter 75/155 - loss 0.43373202 - time (sec): 0.86 - samples/sec: 10029.64 - lr: 0.000195
2023-06-16 13:26:52,014 epoch 59 - iter 90/155 - loss 0.43182194 - time (sec): 1.02 - samples/sec: 10146.85 - lr: 0.000195

100%|██████████| 20/20 [00:00<00:00, 121.28it/s]

2023-06-16 13:26:52,902 Evaluating as a multi-label problem: False





2023-06-16 13:26:52,929 DEV : loss 0.4168710708618164 - f1-score (micro avg)  0.8186
2023-06-16 13:26:52,944 BAD EPOCHS (no improvement): 3
2023-06-16 13:26:52,947 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:53,115 epoch 60 - iter 15/155 - loss 0.46650109 - time (sec): 0.17 - samples/sec: 10089.70 - lr: 0.000195
2023-06-16 13:26:53,286 epoch 60 - iter 30/155 - loss 0.44300178 - time (sec): 0.34 - samples/sec: 10091.79 - lr: 0.000195
2023-06-16 13:26:53,452 epoch 60 - iter 45/155 - loss 0.42585516 - time (sec): 0.50 - samples/sec: 10167.59 - lr: 0.000195
2023-06-16 13:26:53,614 epoch 60 - iter 60/155 - loss 0.42573132 - time (sec): 0.66 - samples/sec: 10236.85 - lr: 0.000195
2023-06-16 13:26:53,780 epoch 60 - iter 75/155 - loss 0.42352285 - time (sec): 0.83 - samples/sec: 10367.60 - lr: 0.000195
2023-06-16 13:26:53,946 epoch 60 - iter 90/155 - loss 0.42053850 - time (sec): 1.00 - samples/sec: 10434.98 - lr: 0.0001

100%|██████████| 20/20 [00:00<00:00, 118.81it/s]

2023-06-16 13:26:54,854 Evaluating as a multi-label problem: False





2023-06-16 13:26:54,883 DEV : loss 0.4168577492237091 - f1-score (micro avg)  0.8191
2023-06-16 13:26:54,901 Epoch    60: reducing learning rate of group 0 to 9.7656e-05.
2023-06-16 13:26:54,903 BAD EPOCHS (no improvement): 4
2023-06-16 13:26:54,909 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:54,910 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:54,913 learning rate too small - quitting training!
2023-06-16 13:26:54,915 ----------------------------------------------------------------------------------------------------
2023-06-16 13:26:58,735 ----------------------------------------------------------------------------------------------------
2023-06-16 13:27:06,331 SequenceTagger predicts: Dictionary with 2 tags: 0, 1


100%|██████████| 20/20 [00:02<00:00,  8.00it/s]

2023-06-16 13:27:09,223 Evaluating as a multi-label problem: False





2023-06-16 13:27:09,254 0.8107	0.8107	0.8107	0.8107
2023-06-16 13:27:09,257 
Results:
- F-score (micro) 0.8107
- F-score (macro) 0.5482
- Accuracy 0.8107

By class:
              precision    recall  f1-score   support

           0     0.8093    0.9949    0.8926      1762
           1     0.8571    0.1156    0.2038       467

    accuracy                         0.8107      2229
   macro avg     0.8332    0.5553    0.5482      2229
weighted avg     0.8193    0.8107    0.7483      2229

2023-06-16 13:27:09,260 ----------------------------------------------------------------------------------------------------


{'test_score': 0.8106774338268282,
 'dev_score_history': [0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7865219357756671,
  0.7919493441881501,
  0.7869742198100407,
  0.8168249660786974,
  0.8091361374943464,
  0.8086838534599728,
  0.8014473089099955,
  0.8086838534599728,
  0.8082315694255993,
  0.8086838534599728,
  0.819538670284939,
  0.8132066938037087,
  0.8186341022161918,
  0.8127544097693351,
  0.8104929895974672,
  0.8154681139755766,
  0.8154681139755766,
  0.8132066938037087,
  0.8141112618724559,
  0.8163726820443238,
  0.8145635459068294,
  0.8163726820443238,
  0.8186341022161918,
  0.8181818181818182,
  0.8186341022161918,
  0.8163726820443238,
  0.8181818181818182,
  0.8186341022161918,
  0.8181818181818182,
  0.8159203980099502,
  0.8159203980099502,
  0.8177295341474446,
  0.8181818181818182,

## Model Inference

In [None]:
sent1 = "Suche nach ähm geöffneten Elektronikläden."
sent2 = "Statistik meiner meiner Herzfrequenz aufrufen."
sent3 = "Zahle meine Strom meine Stromrechnung."
sent4 = "Gesundheitsdaten zeige mir die Gesundheitsdaten."
sent5 = "Könnten Sie eine Erinnerungsmail machen."
sentences = []
sentences.append(sent1)
sentences.append(sent2)
sentences.append(sent3)
sentences.append(sent4)
sentences.append(sent5)

In [None]:
from flair.data import Sentence
from flair.models import SequenceTagger
# load the trained model
model = SequenceTagger.load(save_dir+'/best-model.pt')
# create example sentence
for x in sentences:
  sentence = Sentence(x)
  # predict the tags
  model.predict(sentence)
  print(sentence.to_tagged_string())

2023-06-16 13:27:16,914 SequenceTagger predicts: Dictionary with 2 tags: 0, 1
Sentence[6]: "Suche nach ähm geöffneten Elektronikläden." → ["Suche"/0, "nach"/1, "ähm"/1, "geöffneten"/0, "Elektronikläden"/0, "."/0]
Sentence[6]: "Statistik meiner meiner Herzfrequenz aufrufen." → ["Statistik"/0, "meiner"/0, "meiner"/0, "Herzfrequenz"/0, "aufrufen"/0, "."/0]
Sentence[6]: "Zahle meine Strom meine Stromrechnung." → ["Zahle"/0, "meine"/0, "Strom"/0, "meine"/0, "Stromrechnung"/0, "."/0]
Sentence[6]: "Gesundheitsdaten zeige mir die Gesundheitsdaten." → ["Gesundheitsdaten"/0, "zeige"/0, "mir"/0, "die"/0, "Gesundheitsdaten"/0, "."/0]
Sentence[6]: "Könnten Sie eine Erinnerungsmail machen." → ["Könnten"/0, "Sie"/0, "eine"/0, "Erinnerungsmail"/0, "machen"/0, "."/0]
