# 模型逻辑

## 损失函数和网络模型设计

　　这是一个分类问题，新闻总共有１４类，每条新闻数据最后需要归属到某一类新闻，虽然评价是以F１＿ｓｃｏｒｅ来进行排名，但是损失函数还是用CrossEntropyloss来计算，方便训练。  
  
　　损失函数的输入应该是一个ｌｉｎｅａｒ＋ｓｏｆｔｍａｘ的结果，而输入给ｌｉｎｅａｒ的应该是循环神经网络的最终输出的最后一个隐藏状态的结果。

## 数据集和DataLoader
　　每个训练数据包含ｔｅｘｔ和ｌａｂｅｌ两个数据。其中ｔｅｘｔ的原始数据字符串，字符串的内容为用空格隔开的语料库索引编号，现有索引编号为０－７５４９，数据中最短的ｔｅｘｔ只包含两个文字，最长的ｔｅｘｔ包含５７９２１个文字。初步思路是，先腾出索引编号‘0’给空白，然后将所有数据的text都用‘0’补齐成57921长的数组，然后将给Dataloader输出tensor，传给模型。

## 模型设计
　　模型设计为Embedding（7550，300）＋GRU（300，64）＋Linear（64，14）＋Softmax。

# 模型实现

## 数据集实现

In [31]:
import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

class MyDataset(Dataset):
    def __init__(self,csv_path):
        csv_data = pd.read_csv(csv_path,sep='\t')    
        self.text_data = csv_data.text[:4000]
        self.label_data = csv_data.label[:4000]
        
    def __getitem__(self,index):
        #所有text内的token索引增加1，0空出来代表空格，将每个text补充成57921长
        text_str = self.text_data[index]
        text_list = [int(x)+1 for x in text_str.split()]
        text_list.extend([0]*(57921-len(text_list)))
        text_array = np.array(text_list)
        label_int = self.label_data[index]
        return text_array,label_int
        
    def __len__(self):
        return len(self.text_data)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(7550,100)
        self.rnn = nn.GRU(100,50,batch_first=True)
        self.fc = nn.Linear(50,14)
        
    def forward(self,X):
#         print('X:',X.shape)
        X = self.embedding(X)
#         print('embedding:',X.shape)
        _,X = self.rnn(X)
#         print('rnn:',X.shape)
        X = X.squeeze(dim=0)
#         print('squeeze:',X.shape)
        y_hat = self.fc(X)
#         print('y_hat:',y_hat.shape)
        return y_hat

    
class MyTrain():
    def __init__(self,max_epoch=1,random_seed=1,lr=0.001,out_dir='./'):
        self.max_epoch = max_epoch
        self.random_seed = random_seed
        self.lr = lr
        self.out_dir = out_dir
        self.iter = 0
        
    def fix_random(self):
        import random
        import numpy as np
        import torch
        random.seed(self.random_seed)
        np.random.seed(self.random_seed)
        torch.random.manual_seed(self.random_seed)
        torch.cuda.random.manual_seed_all(self.random_seed)
        torch.backends.cudnn.deterministic = True
        print(f'random seed:{self.random_seed}')
        
    def my_train(self):
        
        max_epoch,lr = self.max_epoch,self.lr
        if self.random_seed is not None:
            self.fix_random()
        my_dataset = MyDataset('./train_set.csv')
        my_model = MyModel()
        my_model.train()          #将模型设置为训练模式
        my_optim = torch.optim.Adam(my_model.parameters(),lr=lr)
        my_loss = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            my_model.cuda()
            my_loss.cuda()
        print(f'train device:{next(iter(my_model.parameters())).device}')  #显示训练设备
        
        for epoch_index in range(max_epoch):
            loss_list = []
            f1_score_list = []
            best_f1_score = 0
            my_dataloader = DataLoader(my_dataset,batch_size=4,shuffle=True)
            my_dataloader = tqdm(my_dataloader)
            for batch_index,batch_data in enumerate(my_dataloader):
                batch_text,batch_label = batch_data
#                 print('batch_text:',batch_text.shape)
#                 print('batch_label:',batch_label)
                if torch.cuda.is_available():
                    batch_text = batch_text.cuda()
                    batch_label = batch_label.cuda()
                    
                y_hat = my_model(batch_text)
                batch_loss = my_loss(y_hat,batch_label)
                
                my_optim.zero_grad()
                batch_loss.backward()
#                 for i in my_model.parameters():
#                     print('grad:',i.grad)
                my_optim.step()
                my_optim.param_groups[0]['lr'] = lr*(0.8**((self.iter//1000)%10))
                
                #显示batch结果
                batch_lr = round(my_optim.param_groups[0]['lr'],5)
                batch_loss = round(batch_loss.item(),4)
                loss_list.append(batch_loss)
                mean_loss = round((sum(loss_list)/len(loss_list)),3)

                batch_f1_score = self.f1_score(y_hat.data,batch_label.data)
                f1_score_list.append(batch_f1_score)
                mean_f1 = round(sum(f1_score_list)/len(f1_score_list),3)
                my_dataloader.set_description(f'epoch:{epoch_index},batch:{batch_index},lr:{batch_lr},loss:{batch_loss},mean_loss:{mean_loss},mean_f1:{mean_f1}')
                
                #存储模型
                torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_last'))                
                if batch_f1_score>best_f1_score:
                    torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_best'))
                    best_f1_score = batch_f1_score
                    
                self.iter += 1
                
    def f1_score(self,y_hat,label,eps=1e-8):
        #y_hat(N,C),label(1)
        y_hat = y_hat.cpu()
        label = label.cpu()
        preds_list = list(torch.argmax(y_hat,dim=1).numpy())
        label_list = list(label.numpy())
        print(f'preds:{preds_list},label:{label_list}')
        class_index_list = []
        for class_index in label_list:
            if class_index not in class_index_list:
                class_index_list.append(class_index)

        f1_score_list = []
        for index in class_index_list:
            if index not in preds_list:
                sub_f1_score = 0
            else:
                tp = 0
                fp = 0
                fn = 0
                for i in range(len(preds_list)):
                    if preds_list[i] == index and label_list[i] == index:
                        tp+=1
                    if preds_list[i] == index and label_list[i] != index: 
                        fp+=1
                    if preds_list[i] != index and label_list[i] == index:
                        fn+=1
                prec_val = tp/(tp+fp) 
                recall_val = tp/(tp+fn)
                sub_f1_score = 2*(prec_val*recall_val)/(prec_val+recall_val+eps)
            f1_score_list.append(sub_f1_score)

        batch_f1_score = sum(f1_score_list)/len(f1_score_list)

        return batch_f1_score
        
        
        
#     #验证所有的验证集数据
#     def my_valid(self,valid_dataloader,model,criterion):

#         #将模型设置为计算模型
#         model.eval()

#         total_loss = 0
#         with torch.no_grad():
#             for batch_index,valid_data in enumerate(valid_dataloader):
#                 batch_feat,batch_label = valid_data
#                 y_hat = model(batch_feat)
#                 loss = criterion(y_hat,batch_label)
#                 total_loss+=loss
#             total_loss/=(batch_index+1)

#         return total_loss

In [32]:
if __name__ == '__main__':
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    MyTrain(max_epoch=1,random_seed=None).my_train()

train device:cuda:0


epoch:0,batch:0,lr:0.001,loss:2.9985,mean_loss:2.998,mean_f1:0.0:   0%|               | 1/1000 [00:00<06:37,  2.51it/s]

preds:[3, 3, 3, 3],label:[0, 13, 1, 11]


epoch:0,batch:1,lr:0.001,loss:2.6335,mean_loss:2.816,mean_f1:0.05:   0%|              | 2/1000 [00:00<05:30,  3.02it/s]

preds:[3, 3, 3, 3],label:[7, 5, 3, 0]


epoch:0,batch:2,lr:0.001,loss:2.617,mean_loss:2.75,mean_f1:0.033:   0%|               | 3/1000 [00:00<04:52,  3.41it/s]

preds:[3, 3, 3, 3],label:[0, 8, 4, 8]


epoch:0,batch:3,lr:0.001,loss:2.6465,mean_loss:2.724,mean_f1:0.025:   0%|             | 4/1000 [00:01<04:37,  3.59it/s]

preds:[3, 3, 3, 3],label:[0, 1, 1, 1]


epoch:0,batch:4,lr:0.001,loss:2.749,mean_loss:2.729,mean_f1:0.02:   0%|               | 5/1000 [00:01<04:28,  3.71it/s]

preds:[3, 3, 3, 3],label:[0, 0, 6, 2]


epoch:0,batch:5,lr:0.001,loss:2.4157,mean_loss:2.677,mean_f1:0.033:   1%|             | 6/1000 [00:01<04:21,  3.80it/s]

preds:[3, 3, 3, 3],label:[3, 7, 1, 4]


epoch:0,batch:6,lr:0.001,loss:2.656,mean_loss:2.674,mean_f1:0.029:   1%|              | 7/1000 [00:01<04:22,  3.79it/s]

preds:[3, 3, 3, 3],label:[7, 2, 6, 1]


epoch:0,batch:7,lr:0.001,loss:2.6749,mean_loss:2.674,mean_f1:0.025:   1%|             | 8/1000 [00:02<04:21,  3.80it/s]

preds:[3, 3, 3, 3],label:[4, 9, 2, 8]


epoch:0,batch:8,lr:0.001,loss:2.5024,mean_loss:2.655,mean_f1:0.033:   1%|             | 9/1000 [00:02<04:19,  3.82it/s]

preds:[3, 3, 3, 3],label:[2, 5, 3, 0]


epoch:0,batch:9,lr:0.001,loss:2.3936,mean_loss:2.629,mean_f1:0.04:   1%|▏            | 10/1000 [00:02<04:15,  3.87it/s]

preds:[1, 1, 1, 1],label:[4, 1, 2, 8]


epoch:0,batch:10,lr:0.001,loss:2.8226,mean_loss:2.646,mean_f1:0.036:   1%|           | 11/1000 [00:03<04:26,  3.71it/s]

preds:[1, 1, 1, 1],label:[7, 9, 0, 9]


epoch:0,batch:11,lr:0.001,loss:2.2941,mean_loss:2.617,mean_f1:0.044:   1%|▏          | 12/1000 [00:03<04:25,  3.72it/s]

preds:[1, 1, 1, 1],label:[1, 0, 8, 8]


epoch:0,batch:12,lr:0.001,loss:2.619,mean_loss:2.617,mean_f1:0.049:   1%|▏           | 13/1000 [00:03<04:19,  3.81it/s]

preds:[1, 1, 1, 1],label:[5, 2, 9, 1]


epoch:0,batch:13,lr:0.001,loss:2.4016,mean_loss:2.602,mean_f1:0.045:   1%|▏          | 14/1000 [00:03<04:16,  3.84it/s]

preds:[1, 1, 1, 1],label:[7, 3, 0, 3]


epoch:0,batch:14,lr:0.001,loss:2.361,mean_loss:2.586,mean_f1:0.042:   2%|▏           | 15/1000 [00:04<04:18,  3.82it/s]

preds:[1, 1, 1, 1],label:[4, 0, 2, 4]


epoch:0,batch:15,lr:0.001,loss:2.4004,mean_loss:2.574,mean_f1:0.04:   2%|▏           | 16/1000 [00:04<04:16,  3.84it/s]

preds:[1, 1, 1, 1],label:[0, 5, 2, 2]


epoch:0,batch:16,lr:0.001,loss:2.462,mean_loss:2.568,mean_f1:0.045:   2%|▏           | 17/1000 [00:04<04:14,  3.86it/s]

preds:[1, 1, 1, 1],label:[5, 1, 3, 3]


epoch:0,batch:17,lr:0.001,loss:2.1854,mean_loss:2.546,mean_f1:0.05:   2%|▏           | 18/1000 [00:04<04:13,  3.88it/s]

preds:[1, 1, 1, 1],label:[4, 2, 2, 1]


epoch:0,batch:18,lr:0.001,loss:2.1854,mean_loss:2.527,mean_f1:0.047:   2%|▏          | 19/1000 [00:05<04:09,  3.93it/s]

preds:[1, 1, 1, 1],label:[2, 0, 3, 2]


epoch:0,batch:19,lr:0.001,loss:2.0099,mean_loss:2.501,mean_f1:0.056:   2%|▏          | 20/1000 [00:05<04:12,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 1, 2, 1]


epoch:0,batch:20,lr:0.001,loss:2.5481,mean_loss:2.504,mean_f1:0.053:   2%|▏          | 21/1000 [00:05<04:11,  3.89it/s]

preds:[1, 1, 1, 1],label:[4, 6, 0, 4]


epoch:0,batch:21,lr:0.001,loss:2.7851,mean_loss:2.516,mean_f1:0.051:   2%|▏          | 22/1000 [00:05<04:11,  3.89it/s]

preds:[2, 2, 2, 2],label:[9, 4, 12, 1]


epoch:0,batch:22,lr:0.001,loss:2.5419,mean_loss:2.518,mean_f1:0.049:   2%|▎          | 23/1000 [00:06<04:15,  3.82it/s]

preds:[2, 2, 2, 2],label:[6, 8, 0, 3]


epoch:0,batch:23,lr:0.001,loss:2.4434,mean_loss:2.514,mean_f1:0.047:   2%|▎          | 24/1000 [00:06<04:14,  3.83it/s]

preds:[2, 2, 2, 2],label:[1, 3, 5, 3]


epoch:0,batch:24,lr:0.001,loss:2.4798,mean_loss:2.513,mean_f1:0.049:   2%|▎          | 25/1000 [00:06<04:19,  3.75it/s]

preds:[1, 1, 1, 1],label:[5, 1, 0, 6]


epoch:0,batch:25,lr:0.001,loss:2.1652,mean_loss:2.5,mean_f1:0.047:   3%|▎            | 26/1000 [00:06<04:15,  3.81it/s]

preds:[1, 1, 1, 1],label:[4, 3, 2, 0]


epoch:0,batch:26,lr:0.001,loss:2.2077,mean_loss:2.489,mean_f1:0.045:   3%|▎          | 27/1000 [00:07<04:12,  3.85it/s]

preds:[1, 1, 1, 1],label:[0, 3, 0, 3]


epoch:0,batch:27,lr:0.001,loss:1.8823,mean_loss:2.467,mean_f1:0.048:   3%|▎          | 28/1000 [00:07<04:07,  3.93it/s]

preds:[1, 1, 1, 1],label:[1, 0, 2, 2]


epoch:0,batch:28,lr:0.001,loss:1.8644,mean_loss:2.446,mean_f1:0.058:   3%|▎          | 29/1000 [00:07<04:09,  3.90it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 0]


epoch:0,batch:29,lr:0.001,loss:2.0977,mean_loss:2.435,mean_f1:0.056:   3%|▎          | 30/1000 [00:07<04:05,  3.94it/s]

preds:[1, 1, 1, 1],label:[4, 0, 2, 4]


epoch:0,batch:30,lr:0.001,loss:2.6394,mean_loss:2.441,mean_f1:0.058:   3%|▎          | 31/1000 [00:08<04:07,  3.92it/s]

preds:[1, 1, 1, 1],label:[1, 6, 3, 7]


epoch:0,batch:31,lr:0.001,loss:1.8169,mean_loss:2.422,mean_f1:0.06:   3%|▍           | 32/1000 [00:08<04:03,  3.98it/s]

preds:[1, 1, 1, 1],label:[2, 1, 0, 0]


epoch:0,batch:32,lr:0.001,loss:2.1249,mean_loss:2.413,mean_f1:0.062:   3%|▎          | 33/1000 [00:08<04:05,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 5, 1, 2]


epoch:0,batch:33,lr:0.001,loss:3.0754,mean_loss:2.432,mean_f1:0.06:   3%|▍           | 34/1000 [00:08<04:04,  3.95it/s]

preds:[1, 1, 1, 1],label:[6, 0, 11, 9]


epoch:0,batch:34,lr:0.001,loss:2.0914,mean_loss:2.423,mean_f1:0.059:   4%|▍          | 35/1000 [00:09<04:02,  3.98it/s]

preds:[0, 0, 0, 0],label:[8, 1, 2, 2]


epoch:0,batch:35,lr:0.001,loss:2.298,mean_loss:2.419,mean_f1:0.06:   4%|▍            | 36/1000 [00:09<04:00,  4.02it/s]

preds:[0, 0, 0, 0],label:[0, 2, 13, 1]


epoch:0,batch:36,lr:0.001,loss:2.0654,mean_loss:2.41,mean_f1:0.058:   4%|▍           | 37/1000 [00:09<04:00,  4.01it/s]

preds:[0, 0, 0, 0],label:[1, 2, 4, 3]


epoch:0,batch:37,lr:0.001,loss:2.0355,mean_loss:2.4,mean_f1:0.059:   4%|▍            | 38/1000 [00:09<04:19,  3.70it/s]

preds:[0, 0, 0, 0],label:[0, 1, 6, 2]


epoch:0,batch:38,lr:0.001,loss:2.1947,mean_loss:2.395,mean_f1:0.064:   4%|▍          | 39/1000 [00:10<04:15,  3.76it/s]

preds:[0, 0, 0, 0],label:[0, 1, 0, 7]


epoch:0,batch:39,lr:0.001,loss:2.2014,mean_loss:2.39,mean_f1:0.062:   4%|▍           | 40/1000 [00:10<04:12,  3.80it/s]

preds:[0, 0, 0, 0],label:[5, 4, 1, 1]


epoch:0,batch:40,lr:0.001,loss:1.6725,mean_loss:2.372,mean_f1:0.066:   4%|▍          | 41/1000 [00:10<04:10,  3.82it/s]

preds:[0, 0, 0, 0],label:[0, 2, 1, 0]


epoch:0,batch:41,lr:0.001,loss:2.5922,mean_loss:2.377,mean_f1:0.064:   4%|▍          | 42/1000 [00:11<04:08,  3.86it/s]

preds:[0, 0, 0, 0],label:[1, 7, 3, 3]


epoch:0,batch:42,lr:0.001,loss:1.6922,mean_loss:2.361,mean_f1:0.063:   4%|▍          | 43/1000 [00:11<04:07,  3.86it/s]

preds:[1, 1, 1, 1],label:[0, 2, 2, 0]


epoch:0,batch:43,lr:0.001,loss:2.18,mean_loss:2.357,mean_f1:0.061:   4%|▌            | 44/1000 [00:11<04:08,  3.85it/s]

preds:[0, 0, 0, 0],label:[5, 1, 1, 4]


epoch:0,batch:44,lr:0.001,loss:1.8041,mean_loss:2.345,mean_f1:0.063:   4%|▍          | 45/1000 [00:11<04:08,  3.84it/s]

preds:[1, 1, 1, 1],label:[0, 1, 3, 0]


epoch:0,batch:45,lr:0.001,loss:2.3108,mean_loss:2.344,mean_f1:0.066:   5%|▌          | 46/1000 [00:12<04:05,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 6, 8, 1]


epoch:0,batch:46,lr:0.001,loss:2.751,mean_loss:2.353,mean_f1:0.065:   5%|▌           | 47/1000 [00:12<04:05,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 10, 5, 0]


epoch:0,batch:47,lr:0.001,loss:2.5641,mean_loss:2.357,mean_f1:0.068:   5%|▌          | 48/1000 [00:12<04:08,  3.83it/s]

preds:[1, 1, 1, 1],label:[1, 5, 11, 1]


epoch:0,batch:48,lr:0.001,loss:2.0231,mean_loss:2.351,mean_f1:0.07:   5%|▌           | 49/1000 [00:12<04:08,  3.83it/s]

preds:[1, 1, 1, 1],label:[0, 4, 1, 4]


epoch:0,batch:49,lr:0.001,loss:2.6112,mean_loss:2.356,mean_f1:0.071:   5%|▌          | 50/1000 [00:13<04:01,  3.94it/s]

preds:[1, 1, 1, 1],label:[7, 3, 3, 1]


epoch:0,batch:50,lr:0.001,loss:1.8618,mean_loss:2.346,mean_f1:0.072:   5%|▌          | 51/1000 [00:13<03:58,  3.97it/s]

preds:[1, 1, 1, 1],label:[1, 5, 0, 0]


epoch:0,batch:51,lr:0.001,loss:2.8434,mean_loss:2.356,mean_f1:0.075:   5%|▌          | 52/1000 [00:13<04:01,  3.93it/s]

preds:[1, 1, 1, 1],label:[1, 1, 10, 7]


epoch:0,batch:52,lr:0.001,loss:2.4342,mean_loss:2.357,mean_f1:0.075:   5%|▌          | 53/1000 [00:13<03:58,  3.97it/s]

preds:[1, 1, 1, 1],label:[2, 1, 7, 4]


epoch:0,batch:53,lr:0.001,loss:2.5497,mean_loss:2.361,mean_f1:0.074:   5%|▌          | 54/1000 [00:14<03:59,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 8, 7, 0]


epoch:0,batch:54,lr:0.001,loss:1.9044,mean_loss:2.352,mean_f1:0.077:   6%|▌          | 55/1000 [00:14<04:00,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 2, 1, 5]


epoch:0,batch:55,lr:0.001,loss:2.8794,mean_loss:2.362,mean_f1:0.077:   6%|▌          | 56/1000 [00:14<04:00,  3.93it/s]

preds:[1, 1, 1, 1],label:[11, 1, 8, 3]


epoch:0,batch:56,lr:0.001,loss:2.7034,mean_loss:2.368,mean_f1:0.078:   6%|▋          | 57/1000 [00:14<03:56,  3.99it/s]

preds:[1, 1, 1, 1],label:[3, 1, 8, 7]


epoch:0,batch:57,lr:0.001,loss:2.0755,mean_loss:2.363,mean_f1:0.08:   6%|▋           | 58/1000 [00:15<03:56,  3.99it/s]

preds:[1, 1, 1, 1],label:[1, 1, 8, 4]


epoch:0,batch:58,lr:0.001,loss:2.9941,mean_loss:2.373,mean_f1:0.079:   6%|▋          | 59/1000 [00:15<03:58,  3.94it/s]

preds:[1, 1, 1, 1],label:[12, 2, 0, 9]


epoch:0,batch:59,lr:0.001,loss:2.4132,mean_loss:2.374,mean_f1:0.077:   6%|▋          | 60/1000 [00:15<04:07,  3.79it/s]

preds:[1, 1, 1, 1],label:[5, 0, 5, 2]


epoch:0,batch:60,lr:0.001,loss:3.8534,mean_loss:2.398,mean_f1:0.076:   6%|▋          | 61/1000 [00:15<04:02,  3.87it/s]

preds:[1, 1, 1, 1],label:[11, 2, 12, 10]


epoch:0,batch:61,lr:0.001,loss:3.249,mean_loss:2.412,mean_f1:0.075:   6%|▋           | 62/1000 [00:16<04:01,  3.88it/s]

preds:[1, 1, 1, 1],label:[10, 2, 7, 5]


epoch:0,batch:62,lr:0.001,loss:1.6749,mean_loss:2.4,mean_f1:0.077:   6%|▊            | 63/1000 [00:16<04:01,  3.88it/s]

preds:[1, 1, 1, 1],label:[1, 1, 3, 0]


epoch:0,batch:63,lr:0.001,loss:2.1319,mean_loss:2.396,mean_f1:0.078:   6%|▋          | 64/1000 [00:16<03:59,  3.90it/s]

preds:[1, 1, 1, 1],label:[1, 4, 3, 2]


epoch:0,batch:64,lr:0.001,loss:2.1189,mean_loss:2.392,mean_f1:0.078:   6%|▋          | 65/1000 [00:16<03:58,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 1, 3, 2]


epoch:0,batch:65,lr:0.001,loss:1.8788,mean_loss:2.384,mean_f1:0.081:   7%|▋          | 66/1000 [00:17<03:59,  3.90it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 7]


epoch:0,batch:66,lr:0.001,loss:2.3927,mean_loss:2.384,mean_f1:0.079:   7%|▋          | 67/1000 [00:17<03:56,  3.95it/s]

preds:[1, 1, 1, 1],label:[4, 3, 2, 3]


epoch:0,batch:67,lr:0.001,loss:2.3471,mean_loss:2.384,mean_f1:0.08:   7%|▊           | 68/1000 [00:17<03:56,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 8, 0, 8]


epoch:0,batch:68,lr:0.001,loss:2.0251,mean_loss:2.379,mean_f1:0.082:   7%|▊          | 69/1000 [00:17<03:56,  3.94it/s]

preds:[1, 1, 1, 1],label:[3, 1, 8, 1]


epoch:0,batch:69,lr:0.001,loss:2.1344,mean_loss:2.375,mean_f1:0.081:   7%|▊          | 70/1000 [00:18<03:53,  3.98it/s]

preds:[1, 1, 1, 1],label:[0, 3, 0, 4]


epoch:0,batch:70,lr:0.001,loss:2.0238,mean_loss:2.37,mean_f1:0.08:   7%|▉            | 71/1000 [00:18<03:57,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 3, 2, 0]


epoch:0,batch:71,lr:0.001,loss:2.478,mean_loss:2.372,mean_f1:0.079:   7%|▊           | 72/1000 [00:18<03:54,  3.95it/s]

preds:[1, 1, 1, 1],label:[2, 3, 9, 0]


epoch:0,batch:72,lr:0.001,loss:2.0119,mean_loss:2.367,mean_f1:0.08:   7%|▉           | 73/1000 [00:18<03:56,  3.92it/s]

preds:[1, 1, 1, 1],label:[1, 7, 0, 0]


epoch:0,batch:73,lr:0.001,loss:2.014,mean_loss:2.362,mean_f1:0.08:   7%|▉            | 74/1000 [00:19<03:54,  3.96it/s]

preds:[1, 1, 1, 1],label:[8, 0, 0, 1]


epoch:0,batch:74,lr:0.001,loss:2.4536,mean_loss:2.363,mean_f1:0.081:   8%|▊          | 75/1000 [00:19<03:55,  3.94it/s]

preds:[1, 1, 1, 1],label:[11, 1, 2, 2]


epoch:0,batch:75,lr:0.001,loss:2.9337,mean_loss:2.371,mean_f1:0.08:   8%|▉           | 76/1000 [00:19<03:53,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 5, 10, 8]


epoch:0,batch:76,lr:0.001,loss:2.1632,mean_loss:2.368,mean_f1:0.081:   8%|▊          | 77/1000 [00:19<03:49,  4.01it/s]

preds:[1, 1, 1, 1],label:[1, 7, 2, 2]


epoch:0,batch:77,lr:0.001,loss:2.205,mean_loss:2.366,mean_f1:0.081:   8%|▉           | 78/1000 [00:20<03:50,  4.00it/s]

preds:[1, 1, 1, 1],label:[5, 4, 0, 1]


epoch:0,batch:78,lr:0.001,loss:2.3604,mean_loss:2.366,mean_f1:0.08:   8%|▉           | 79/1000 [00:20<03:50,  3.99it/s]

preds:[0, 0, 0, 0],label:[5, 3, 1, 5]


epoch:0,batch:79,lr:0.001,loss:1.9646,mean_loss:2.361,mean_f1:0.081:   8%|▉          | 80/1000 [00:20<03:49,  4.01it/s]

preds:[0, 0, 0, 0],label:[0, 2, 2, 3]


epoch:0,batch:80,lr:0.001,loss:2.1437,mean_loss:2.358,mean_f1:0.081:   8%|▉          | 81/1000 [00:20<03:49,  4.01it/s]

preds:[0, 0, 0, 0],label:[2, 2, 0, 8]


epoch:0,batch:81,lr:0.001,loss:3.1278,mean_loss:2.367,mean_f1:0.08:   8%|▉           | 82/1000 [00:21<03:49,  4.00it/s]

preds:[0, 0, 0, 0],label:[5, 5, 7, 10]


epoch:0,batch:82,lr:0.001,loss:1.9592,mean_loss:2.363,mean_f1:0.079:   8%|▉          | 83/1000 [00:21<03:49,  3.99it/s]

preds:[0, 0, 0, 0],label:[1, 3, 2, 2]


epoch:0,batch:83,lr:0.001,loss:1.7873,mean_loss:2.356,mean_f1:0.081:   8%|▉          | 84/1000 [00:21<03:51,  3.96it/s]

preds:[0, 0, 0, 0],label:[3, 0, 0, 1]


epoch:0,batch:84,lr:0.001,loss:2.2508,mean_loss:2.354,mean_f1:0.081:   8%|▉          | 85/1000 [00:21<04:02,  3.78it/s]

preds:[0, 0, 0, 0],label:[2, 0, 4, 5]


epoch:0,batch:85,lr:0.001,loss:2.1577,mean_loss:2.352,mean_f1:0.08:   9%|█           | 86/1000 [00:22<04:04,  3.74it/s]

preds:[0, 0, 0, 0],label:[1, 3, 1, 4]


epoch:0,batch:86,lr:0.001,loss:2.3335,mean_loss:2.352,mean_f1:0.081:   9%|▉          | 87/1000 [00:22<03:57,  3.84it/s]

preds:[0, 0, 0, 0],label:[0, 4, 3, 4]


epoch:0,batch:87,lr:0.001,loss:2.0992,mean_loss:2.349,mean_f1:0.08:   9%|█           | 88/1000 [00:22<03:53,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 5, 1, 3]


epoch:0,batch:88,lr:0.001,loss:1.9798,mean_loss:2.345,mean_f1:0.08:   9%|█           | 89/1000 [00:23<03:54,  3.89it/s]

preds:[0, 0, 0, 0],label:[1, 5, 2, 0]


epoch:0,batch:89,lr:0.001,loss:1.9133,mean_loss:2.34,mean_f1:0.081:   9%|█           | 90/1000 [00:23<03:51,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 3, 0, 1]


epoch:0,batch:90,lr:0.001,loss:2.639,mean_loss:2.343,mean_f1:0.08:   9%|█▏           | 91/1000 [00:23<03:51,  3.93it/s]

preds:[0, 0, 0, 0],label:[2, 3, 7, 6]


epoch:0,batch:91,lr:0.001,loss:2.4404,mean_loss:2.344,mean_f1:0.08:   9%|█           | 92/1000 [00:23<03:55,  3.86it/s]

preds:[0, 0, 0, 0],label:[0, 5, 1, 6]


epoch:0,batch:92,lr:0.001,loss:1.8009,mean_loss:2.339,mean_f1:0.081:   9%|█          | 93/1000 [00:24<03:59,  3.79it/s]

preds:[0, 0, 0, 0],label:[0, 1, 2, 1]


epoch:0,batch:93,lr:0.001,loss:2.3583,mean_loss:2.339,mean_f1:0.082:   9%|█          | 94/1000 [00:24<03:55,  3.85it/s]

preds:[0, 0, 0, 0],label:[0, 10, 0, 1]


epoch:0,batch:94,lr:0.001,loss:2.1234,mean_loss:2.337,mean_f1:0.081:  10%|█          | 95/1000 [00:24<03:54,  3.86it/s]

preds:[0, 0, 0, 0],label:[7, 3, 1, 1]


epoch:0,batch:95,lr:0.001,loss:1.9655,mean_loss:2.333,mean_f1:0.081:  10%|█          | 96/1000 [00:24<03:51,  3.90it/s]

preds:[1, 1, 1, 1],label:[0, 1, 3, 5]


epoch:0,batch:96,lr:0.001,loss:2.2534,mean_loss:2.332,mean_f1:0.082:  10%|█          | 97/1000 [00:25<03:57,  3.81it/s]

preds:[1, 1, 1, 1],label:[8, 5, 2, 1]


epoch:0,batch:97,lr:0.001,loss:1.8221,mean_loss:2.327,mean_f1:0.082:  10%|█          | 98/1000 [00:25<04:00,  3.74it/s]

preds:[1, 1, 1, 1],label:[1, 3, 0, 2]


epoch:0,batch:98,lr:0.001,loss:2.0456,mean_loss:2.324,mean_f1:0.081:  10%|█          | 99/1000 [00:25<04:01,  3.74it/s]

preds:[1, 1, 1, 1],label:[0, 2, 0, 4]


epoch:0,batch:99,lr:0.001,loss:2.0872,mean_loss:2.321,mean_f1:0.082:  10%|█         | 100/1000 [00:25<03:59,  3.76it/s]

preds:[1, 1, 1, 1],label:[1, 1, 5, 4]


epoch:0,batch:100,lr:0.001,loss:1.8272,mean_loss:2.317,mean_f1:0.083:  10%|▉        | 101/1000 [00:26<03:57,  3.79it/s]

preds:[1, 1, 1, 1],label:[3, 3, 0, 1]


epoch:0,batch:101,lr:0.001,loss:2.9583,mean_loss:2.323,mean_f1:0.083:  10%|▉        | 102/1000 [00:26<03:53,  3.85it/s]

preds:[1, 1, 1, 1],label:[8, 1, 10, 8]


epoch:0,batch:102,lr:0.001,loss:2.9509,mean_loss:2.329,mean_f1:0.084:  10%|▉        | 103/1000 [00:26<03:50,  3.89it/s]

preds:[1, 1, 1, 1],label:[10, 1, 2, 9]


epoch:0,batch:103,lr:0.001,loss:1.9973,mean_loss:2.326,mean_f1:0.084:  10%|▉        | 104/1000 [00:26<03:48,  3.93it/s]

preds:[1, 1, 1, 1],label:[1, 3, 0, 4]


epoch:0,batch:104,lr:0.001,loss:3.679,mean_loss:2.339,mean_f1:0.083:  10%|█         | 105/1000 [00:27<03:48,  3.91it/s]

preds:[1, 1, 1, 1],label:[6, 6, 10, 7]


epoch:0,batch:105,lr:0.001,loss:2.3797,mean_loss:2.339,mean_f1:0.082:  11%|▉        | 106/1000 [00:27<03:48,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 3, 2, 6]


epoch:0,batch:106,lr:0.001,loss:2.4697,mean_loss:2.34,mean_f1:0.082:  11%|█         | 107/1000 [00:27<03:47,  3.92it/s]

preds:[1, 1, 1, 1],label:[10, 5, 2, 1]


epoch:0,batch:107,lr:0.001,loss:3.0097,mean_loss:2.346,mean_f1:0.082:  11%|▉        | 108/1000 [00:27<03:45,  3.95it/s]

preds:[1, 1, 1, 1],label:[6, 4, 6, 2]


epoch:0,batch:108,lr:0.001,loss:3.2088,mean_loss:2.354,mean_f1:0.082:  11%|▉        | 109/1000 [00:28<03:45,  3.95it/s]

preds:[1, 1, 1, 1],label:[10, 1, 13, 4]


epoch:0,batch:109,lr:0.001,loss:2.0114,mean_loss:2.351,mean_f1:0.082:  11%|▉        | 110/1000 [00:28<03:52,  3.83it/s]

preds:[1, 1, 1, 1],label:[1, 4, 0, 2]


epoch:0,batch:110,lr:0.001,loss:2.3849,mean_loss:2.352,mean_f1:0.082:  11%|▉        | 111/1000 [00:28<03:53,  3.80it/s]

preds:[1, 1, 1, 1],label:[0, 0, 1, 11]


epoch:0,batch:111,lr:0.001,loss:1.8291,mean_loss:2.347,mean_f1:0.083:  11%|█        | 112/1000 [00:28<03:49,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 2, 0, 1]


epoch:0,batch:112,lr:0.001,loss:2.3481,mean_loss:2.347,mean_f1:0.083:  11%|█        | 113/1000 [00:29<03:50,  3.86it/s]

preds:[1, 1, 1, 1],label:[4, 3, 7, 1]


epoch:0,batch:113,lr:0.001,loss:1.8453,mean_loss:2.343,mean_f1:0.084:  11%|█        | 114/1000 [00:29<03:49,  3.86it/s]

preds:[1, 1, 1, 1],label:[3, 1, 3, 3]


epoch:0,batch:114,lr:0.001,loss:2.65,mean_loss:2.345,mean_f1:0.083:  12%|█▎         | 115/1000 [00:29<03:48,  3.87it/s]

preds:[1, 1, 1, 1],label:[6, 6, 0, 2]


epoch:0,batch:115,lr:0.001,loss:2.1424,mean_loss:2.343,mean_f1:0.083:  12%|█        | 116/1000 [00:29<03:46,  3.90it/s]

preds:[1, 1, 1, 1],label:[8, 0, 2, 1]


epoch:0,batch:116,lr:0.001,loss:2.4289,mean_loss:2.344,mean_f1:0.082:  12%|█        | 117/1000 [00:30<03:46,  3.91it/s]

preds:[1, 1, 1, 1],label:[4, 6, 3, 3]


epoch:0,batch:117,lr:0.001,loss:2.4603,mean_loss:2.345,mean_f1:0.082:  12%|█        | 118/1000 [00:30<03:45,  3.91it/s]

preds:[1, 1, 1, 1],label:[10, 0, 3, 4]


epoch:0,batch:118,lr:0.001,loss:2.497,mean_loss:2.346,mean_f1:0.081:  12%|█▏        | 119/1000 [00:30<03:45,  3.90it/s]

preds:[1, 1, 1, 1],label:[5, 0, 2, 8]


epoch:0,batch:119,lr:0.001,loss:2.1242,mean_loss:2.345,mean_f1:0.081:  12%|█        | 120/1000 [00:31<03:46,  3.89it/s]

preds:[1, 1, 1, 1],label:[6, 3, 1, 2]


epoch:0,batch:120,lr:0.001,loss:2.621,mean_loss:2.347,mean_f1:0.082:  12%|█▏        | 121/1000 [00:31<03:45,  3.91it/s]

preds:[1, 1, 1, 1],label:[1, 12, 1, 5]


epoch:0,batch:121,lr:0.001,loss:1.986,mean_loss:2.344,mean_f1:0.083:  12%|█▏        | 122/1000 [00:31<03:46,  3.88it/s]

preds:[1, 1, 1, 1],label:[1, 4, 3, 2]


epoch:0,batch:122,lr:0.001,loss:1.871,mean_loss:2.34,mean_f1:0.083:  12%|█▎         | 123/1000 [00:31<03:46,  3.87it/s]

preds:[1, 1, 1, 1],label:[0, 0, 1, 2]


epoch:0,batch:123,lr:0.001,loss:2.3319,mean_loss:2.34,mean_f1:0.083:  12%|█▏        | 124/1000 [00:32<03:45,  3.89it/s]

preds:[1, 1, 1, 1],label:[0, 5, 6, 1]


epoch:0,batch:124,lr:0.001,loss:2.4933,mean_loss:2.341,mean_f1:0.083:  12%|█▏       | 125/1000 [00:32<03:47,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 0, 2, 11]


epoch:0,batch:125,lr:0.001,loss:2.4657,mean_loss:2.342,mean_f1:0.083:  13%|█▏       | 126/1000 [00:32<03:52,  3.76it/s]

preds:[1, 1, 1, 1],label:[10, 0, 3, 5]


epoch:0,batch:126,lr:0.001,loss:2.052,mean_loss:2.34,mean_f1:0.083:  13%|█▍         | 127/1000 [00:32<03:53,  3.74it/s]

preds:[1, 1, 1, 1],label:[4, 2, 1, 2]


epoch:0,batch:127,lr:0.001,loss:1.7485,mean_loss:2.335,mean_f1:0.084:  13%|█▏       | 128/1000 [00:33<03:49,  3.79it/s]

preds:[1, 1, 1, 1],label:[3, 2, 1, 1]


epoch:0,batch:128,lr:0.001,loss:2.7604,mean_loss:2.339,mean_f1:0.083:  13%|█▏       | 129/1000 [00:33<03:46,  3.85it/s]

preds:[1, 1, 1, 1],label:[7, 2, 7, 2]


epoch:0,batch:129,lr:0.001,loss:2.1586,mean_loss:2.337,mean_f1:0.083:  13%|█▏       | 130/1000 [00:33<03:44,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 2, 0, 5]


epoch:0,batch:130,lr:0.001,loss:2.068,mean_loss:2.335,mean_f1:0.083:  13%|█▎        | 131/1000 [00:33<03:45,  3.85it/s]

preds:[1, 1, 1, 1],label:[0, 1, 0, 6]


epoch:0,batch:131,lr:0.001,loss:1.8115,mean_loss:2.331,mean_f1:0.084:  13%|█▏       | 132/1000 [00:34<03:43,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 2, 0, 0]


epoch:0,batch:132,lr:0.001,loss:2.3994,mean_loss:2.332,mean_f1:0.084:  13%|█▏       | 133/1000 [00:34<03:43,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 2, 9, 1]


epoch:0,batch:133,lr:0.001,loss:2.386,mean_loss:2.332,mean_f1:0.084:  13%|█▎        | 134/1000 [00:34<03:50,  3.76it/s]

preds:[1, 1, 1, 1],label:[1, 5, 0, 8]


epoch:0,batch:134,lr:0.001,loss:2.1449,mean_loss:2.331,mean_f1:0.083:  14%|█▏       | 135/1000 [00:34<03:59,  3.61it/s]

preds:[1, 1, 1, 1],label:[3, 5, 3, 0]


epoch:0,batch:135,lr:0.001,loss:2.0392,mean_loss:2.329,mean_f1:0.083:  14%|█▏       | 136/1000 [00:35<03:52,  3.72it/s]

preds:[1, 1, 1, 1],label:[0, 6, 0, 2]


epoch:0,batch:136,lr:0.001,loss:3.62,mean_loss:2.338,mean_f1:0.082:  14%|█▌         | 137/1000 [00:35<03:45,  3.83it/s]

preds:[0, 0, 0, 0],label:[10, 2, 11, 13]


epoch:0,batch:137,lr:0.001,loss:1.797,mean_loss:2.334,mean_f1:0.082:  14%|█▍        | 138/1000 [00:35<03:42,  3.88it/s]

preds:[0, 0, 0, 0],label:[2, 1, 3, 0]


epoch:0,batch:138,lr:0.001,loss:3.1829,mean_loss:2.34,mean_f1:0.081:  14%|█▍        | 139/1000 [00:35<03:40,  3.90it/s]

preds:[0, 0, 0, 0],label:[6, 9, 5, 4]


epoch:0,batch:139,lr:0.001,loss:2.9047,mean_loss:2.344,mean_f1:0.082:  14%|█▎       | 140/1000 [00:36<03:41,  3.88it/s]

preds:[0, 0, 0, 0],label:[8, 8, 0, 10]


epoch:0,batch:140,lr:0.001,loss:2.5751,mean_loss:2.346,mean_f1:0.082:  14%|█▎       | 141/1000 [00:36<03:41,  3.87it/s]

preds:[0, 0, 0, 0],label:[9, 1, 0, 4]


epoch:0,batch:141,lr:0.001,loss:2.4566,mean_loss:2.347,mean_f1:0.081:  14%|█▎       | 142/1000 [00:36<03:41,  3.88it/s]

preds:[0, 0, 0, 0],label:[1, 5, 7, 1]


epoch:0,batch:142,lr:0.001,loss:2.3607,mean_loss:2.347,mean_f1:0.081:  14%|█▎       | 143/1000 [00:37<03:44,  3.82it/s]

preds:[0, 0, 0, 0],label:[3, 5, 5, 1]


epoch:0,batch:143,lr:0.001,loss:2.031,mean_loss:2.345,mean_f1:0.08:  14%|█▌         | 144/1000 [00:37<03:44,  3.81it/s]

preds:[0, 0, 0, 0],label:[2, 2, 1, 5]


epoch:0,batch:144,lr:0.001,loss:2.0078,mean_loss:2.342,mean_f1:0.081:  14%|█▎       | 145/1000 [00:37<03:47,  3.76it/s]

preds:[0, 0, 0, 0],label:[0, 3, 4, 0]


epoch:0,batch:145,lr:0.001,loss:2.106,mean_loss:2.341,mean_f1:0.082:  15%|█▍        | 146/1000 [00:37<03:47,  3.75it/s]

preds:[0, 0, 0, 0],label:[0, 2, 8, 2]


epoch:0,batch:146,lr:0.001,loss:2.6633,mean_loss:2.343,mean_f1:0.081:  15%|█▎       | 147/1000 [00:38<03:48,  3.74it/s]

preds:[0, 0, 0, 0],label:[6, 1, 9, 2]


epoch:0,batch:147,lr:0.001,loss:3.2166,mean_loss:2.349,mean_f1:0.08:  15%|█▍        | 148/1000 [00:38<03:42,  3.84it/s]

preds:[0, 0, 0, 0],label:[13, 3, 9, 2]


epoch:0,batch:148,lr:0.001,loss:2.0329,mean_loss:2.347,mean_f1:0.081:  15%|█▎       | 149/1000 [00:38<03:37,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 0, 3, 5]


epoch:0,batch:149,lr:0.001,loss:2.6607,mean_loss:2.349,mean_f1:0.08:  15%|█▌        | 150/1000 [00:38<03:42,  3.81it/s]

preds:[0, 0, 0, 0],label:[4, 7, 5, 1]


epoch:0,batch:150,lr:0.001,loss:2.8353,mean_loss:2.352,mean_f1:0.08:  15%|█▌        | 151/1000 [00:39<03:42,  3.81it/s]

preds:[0, 0, 0, 0],label:[8, 1, 12, 2]


epoch:0,batch:151,lr:0.001,loss:1.9058,mean_loss:2.349,mean_f1:0.08:  15%|█▌        | 152/1000 [00:39<03:42,  3.81it/s]

preds:[0, 0, 0, 0],label:[1, 5, 0, 1]


epoch:0,batch:152,lr:0.001,loss:2.2056,mean_loss:2.348,mean_f1:0.079:  15%|█▍       | 153/1000 [00:39<03:38,  3.87it/s]

preds:[0, 0, 0, 0],label:[2, 2, 7, 1]


epoch:0,batch:153,lr:0.001,loss:1.8919,mean_loss:2.345,mean_f1:0.08:  15%|█▌        | 154/1000 [00:39<03:40,  3.84it/s]

preds:[0, 0, 0, 0],label:[0, 1, 5, 1]


epoch:0,batch:154,lr:0.001,loss:1.8899,mean_loss:2.342,mean_f1:0.08:  16%|█▌        | 155/1000 [00:40<03:35,  3.92it/s]

preds:[0, 0, 0, 0],label:[3, 1, 0, 1]


epoch:0,batch:155,lr:0.001,loss:1.7344,mean_loss:2.338,mean_f1:0.08:  16%|█▌        | 156/1000 [00:40<03:35,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 2, 1, 1]


epoch:0,batch:156,lr:0.001,loss:2.8793,mean_loss:2.342,mean_f1:0.079:  16%|█▍       | 157/1000 [00:40<03:35,  3.92it/s]

preds:[1, 1, 1, 1],label:[0, 3, 12, 4]


epoch:0,batch:157,lr:0.001,loss:2.1449,mean_loss:2.34,mean_f1:0.079:  16%|█▌        | 158/1000 [00:40<03:31,  3.99it/s]

preds:[1, 1, 1, 1],label:[4, 2, 1, 5]


epoch:0,batch:158,lr:0.001,loss:1.8868,mean_loss:2.338,mean_f1:0.079:  16%|█▍       | 159/1000 [00:41<03:47,  3.70it/s]

preds:[1, 1, 1, 1],label:[5, 2, 2, 2]


epoch:0,batch:159,lr:0.001,loss:2.4359,mean_loss:2.338,mean_f1:0.079:  16%|█▍       | 160/1000 [00:41<03:44,  3.75it/s]

preds:[1, 1, 1, 1],label:[8, 2, 8, 1]


epoch:0,batch:160,lr:0.001,loss:1.7817,mean_loss:2.335,mean_f1:0.079:  16%|█▍       | 161/1000 [00:41<03:41,  3.78it/s]

preds:[1, 1, 1, 1],label:[2, 0, 0, 2]


epoch:0,batch:161,lr:0.001,loss:2.4093,mean_loss:2.335,mean_f1:0.078:  16%|█▍       | 162/1000 [00:42<03:41,  3.79it/s]

preds:[1, 1, 1, 1],label:[4, 0, 8, 2]


epoch:0,batch:162,lr:0.001,loss:2.3614,mean_loss:2.335,mean_f1:0.078:  16%|█▍       | 163/1000 [00:42<03:38,  3.84it/s]

preds:[1, 1, 1, 1],label:[3, 0, 1, 7]


epoch:0,batch:163,lr:0.001,loss:2.4502,mean_loss:2.336,mean_f1:0.078:  16%|█▍       | 164/1000 [00:42<03:40,  3.80it/s]

preds:[1, 1, 1, 1],label:[0, 4, 7, 1]


epoch:0,batch:164,lr:0.001,loss:2.442,mean_loss:2.337,mean_f1:0.078:  16%|█▋        | 165/1000 [00:42<03:41,  3.77it/s]

preds:[1, 1, 1, 1],label:[0, 4, 8, 0]


epoch:0,batch:165,lr:0.001,loss:2.5148,mean_loss:2.338,mean_f1:0.077:  17%|█▍       | 166/1000 [00:43<03:40,  3.79it/s]

preds:[1, 1, 1, 1],label:[0, 6, 8, 0]


epoch:0,batch:166,lr:0.001,loss:2.2062,mean_loss:2.337,mean_f1:0.077:  17%|█▌       | 167/1000 [00:43<03:39,  3.80it/s]

preds:[1, 1, 1, 1],label:[4, 3, 0, 2]


epoch:0,batch:167,lr:0.001,loss:1.8935,mean_loss:2.334,mean_f1:0.077:  17%|█▌       | 168/1000 [00:43<03:38,  3.81it/s]

preds:[1, 1, 1, 1],label:[2, 4, 2, 1]


epoch:0,batch:168,lr:0.001,loss:2.1595,mean_loss:2.333,mean_f1:0.078:  17%|█▌       | 169/1000 [00:43<03:35,  3.86it/s]

preds:[1, 1, 1, 1],label:[1, 9, 2, 1]


epoch:0,batch:169,lr:0.001,loss:1.985,mean_loss:2.331,mean_f1:0.079:  17%|█▋        | 170/1000 [00:44<03:36,  3.83it/s]

preds:[1, 1, 1, 1],label:[0, 1, 1, 8]


epoch:0,batch:170,lr:0.001,loss:2.8797,mean_loss:2.334,mean_f1:0.078:  17%|█▌       | 171/1000 [00:44<03:35,  3.85it/s]

preds:[1, 1, 1, 1],label:[2, 9, 4, 7]


epoch:0,batch:171,lr:0.001,loss:2.3896,mean_loss:2.335,mean_f1:0.078:  17%|█▌       | 172/1000 [00:44<03:37,  3.81it/s]

preds:[1, 1, 1, 1],label:[11, 2, 0, 0]


epoch:0,batch:172,lr:0.001,loss:2.4292,mean_loss:2.335,mean_f1:0.078:  17%|█▌       | 173/1000 [00:44<03:33,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 0, 4, 3]


epoch:0,batch:173,lr:0.001,loss:2.4255,mean_loss:2.336,mean_f1:0.077:  17%|█▌       | 174/1000 [00:45<03:30,  3.92it/s]

preds:[1, 1, 1, 1],label:[8, 5, 2, 5]


epoch:0,batch:174,lr:0.001,loss:2.8442,mean_loss:2.339,mean_f1:0.077:  18%|█▌       | 175/1000 [00:45<03:33,  3.86it/s]

preds:[2, 2, 2, 2],label:[4, 9, 6, 2]


epoch:0,batch:175,lr:0.001,loss:2.4732,mean_loss:2.34,mean_f1:0.077:  18%|█▊        | 176/1000 [00:45<03:33,  3.86it/s]

preds:[2, 2, 2, 2],label:[10, 3, 2, 1]


epoch:0,batch:176,lr:0.001,loss:2.6015,mean_loss:2.341,mean_f1:0.078:  18%|█▌       | 177/1000 [00:45<03:32,  3.86it/s]

preds:[2, 2, 2, 2],label:[6, 8, 2, 4]


epoch:0,batch:177,lr:0.001,loss:2.154,mean_loss:2.34,mean_f1:0.077:  18%|█▉         | 178/1000 [00:46<03:29,  3.93it/s]

preds:[2, 2, 2, 2],label:[5, 0, 5, 1]


epoch:0,batch:178,lr:0.001,loss:2.3474,mean_loss:2.34,mean_f1:0.077:  18%|█▊        | 179/1000 [00:46<03:25,  4.00it/s]

preds:[2, 2, 2, 2],label:[1, 0, 11, 2]


epoch:0,batch:179,lr:0.001,loss:3.052,mean_loss:2.344,mean_f1:0.077:  18%|█▊        | 180/1000 [00:46<03:30,  3.90it/s]

preds:[2, 2, 2, 2],label:[5, 0, 6, 12]


epoch:0,batch:180,lr:0.001,loss:2.0048,mean_loss:2.342,mean_f1:0.076:  18%|█▋       | 181/1000 [00:46<03:32,  3.86it/s]

preds:[2, 2, 2, 2],label:[0, 1, 8, 1]


epoch:0,batch:181,lr:0.001,loss:2.5078,mean_loss:2.343,mean_f1:0.076:  18%|█▋       | 182/1000 [00:47<03:32,  3.84it/s]

preds:[2, 2, 2, 2],label:[11, 1, 4, 2]


epoch:0,batch:182,lr:0.001,loss:2.1162,mean_loss:2.342,mean_f1:0.077:  18%|█▋       | 183/1000 [00:47<03:48,  3.57it/s]

preds:[2, 2, 2, 2],label:[3, 1, 4, 2]


epoch:0,batch:183,lr:0.001,loss:1.7063,mean_loss:2.338,mean_f1:0.077:  18%|█▋       | 184/1000 [00:47<03:42,  3.67it/s]

preds:[2, 2, 2, 2],label:[0, 0, 1, 2]


epoch:0,batch:184,lr:0.001,loss:2.4004,mean_loss:2.339,mean_f1:0.077:  18%|█▋       | 185/1000 [00:48<03:39,  3.71it/s]

preds:[2, 2, 2, 2],label:[0, 1, 12, 1]


epoch:0,batch:185,lr:0.001,loss:2.644,mean_loss:2.34,mean_f1:0.076:  19%|██         | 186/1000 [00:48<03:38,  3.73it/s]

preds:[2, 2, 2, 2],label:[3, 12, 1, 1]


epoch:0,batch:186,lr:0.001,loss:2.3396,mean_loss:2.34,mean_f1:0.076:  19%|█▊        | 187/1000 [00:48<03:37,  3.73it/s]

preds:[2, 2, 2, 2],label:[2, 1, 5, 7]


epoch:0,batch:187,lr:0.001,loss:2.1114,mean_loss:2.339,mean_f1:0.076:  19%|█▋       | 188/1000 [00:48<03:37,  3.73it/s]

preds:[2, 2, 2, 2],label:[1, 4, 3, 1]


epoch:0,batch:188,lr:0.001,loss:1.8386,mean_loss:2.336,mean_f1:0.076:  19%|█▋       | 189/1000 [00:49<03:35,  3.77it/s]

preds:[1, 1, 1, 1],label:[2, 2, 4, 1]


epoch:0,batch:189,lr:0.001,loss:2.1924,mean_loss:2.336,mean_f1:0.076:  19%|█▋       | 190/1000 [00:49<03:33,  3.79it/s]

preds:[1, 1, 1, 1],label:[9, 2, 1, 0]


epoch:0,batch:190,lr:0.001,loss:1.7127,mean_loss:2.332,mean_f1:0.077:  19%|█▋       | 191/1000 [00:49<03:35,  3.75it/s]

preds:[1, 1, 1, 1],label:[0, 0, 2, 1]


epoch:0,batch:191,lr:0.001,loss:2.1248,mean_loss:2.331,mean_f1:0.077:  19%|█▋       | 192/1000 [00:49<03:33,  3.79it/s]

preds:[1, 1, 1, 1],label:[2, 1, 0, 7]


epoch:0,batch:192,lr:0.001,loss:2.4189,mean_loss:2.332,mean_f1:0.077:  19%|█▋       | 193/1000 [00:50<03:29,  3.86it/s]

preds:[1, 1, 1, 1],label:[4, 11, 1, 2]


epoch:0,batch:193,lr:0.001,loss:2.6456,mean_loss:2.333,mean_f1:0.076:  19%|█▋       | 194/1000 [00:50<03:27,  3.88it/s]

preds:[1, 1, 1, 1],label:[5, 5, 7, 0]


epoch:0,batch:194,lr:0.001,loss:2.8972,mean_loss:2.336,mean_f1:0.076:  20%|█▊       | 195/1000 [00:50<03:29,  3.84it/s]

preds:[1, 1, 1, 1],label:[3, 3, 0, 12]


epoch:0,batch:195,lr:0.001,loss:2.3793,mean_loss:2.337,mean_f1:0.076:  20%|█▊       | 196/1000 [00:50<03:28,  3.86it/s]

preds:[1, 1, 1, 1],label:[0, 3, 6, 1]


epoch:0,batch:196,lr:0.001,loss:2.9577,mean_loss:2.34,mean_f1:0.076:  20%|█▉        | 197/1000 [00:51<03:27,  3.87it/s]

preds:[1, 1, 1, 1],label:[5, 10, 4, 3]


epoch:0,batch:197,lr:0.001,loss:3.146,mean_loss:2.344,mean_f1:0.075:  20%|█▉        | 198/1000 [00:51<03:26,  3.88it/s]

preds:[1, 1, 1, 1],label:[4, 0, 6, 13]


epoch:0,batch:198,lr:0.001,loss:2.6337,mean_loss:2.345,mean_f1:0.075:  20%|█▊       | 199/1000 [00:51<03:24,  3.92it/s]

preds:[1, 1, 1, 1],label:[3, 7, 0, 5]


epoch:0,batch:199,lr:0.001,loss:2.4462,mean_loss:2.346,mean_f1:0.075:  20%|█▊       | 200/1000 [00:51<03:20,  3.98it/s]

preds:[1, 1, 1, 1],label:[3, 5, 3, 0]


epoch:0,batch:200,lr:0.001,loss:2.3229,mean_loss:2.346,mean_f1:0.074:  20%|█▊       | 201/1000 [00:52<03:20,  3.98it/s]

preds:[1, 1, 1, 1],label:[4, 2, 6, 0]


epoch:0,batch:201,lr:0.001,loss:1.9362,mean_loss:2.344,mean_f1:0.074:  20%|█▊       | 202/1000 [00:52<03:19,  4.00it/s]

preds:[1, 1, 1, 1],label:[2, 1, 5, 0]


epoch:0,batch:202,lr:0.001,loss:2.9209,mean_loss:2.346,mean_f1:0.074:  20%|█▊       | 203/1000 [00:52<03:19,  4.00it/s]

preds:[1, 1, 1, 1],label:[13, 3, 0, 3]


epoch:0,batch:203,lr:0.001,loss:2.805,mean_loss:2.349,mean_f1:0.074:  20%|██        | 204/1000 [00:52<03:20,  3.97it/s]

preds:[1, 1, 1, 1],label:[0, 1, 12, 12]


epoch:0,batch:204,lr:0.001,loss:2.5955,mean_loss:2.35,mean_f1:0.074:  20%|██        | 205/1000 [00:53<03:20,  3.97it/s]

preds:[1, 1, 1, 1],label:[3, 4, 12, 0]


epoch:0,batch:205,lr:0.001,loss:2.6006,mean_loss:2.351,mean_f1:0.074:  21%|█▊       | 206/1000 [00:53<03:21,  3.94it/s]

preds:[0, 0, 0, 0],label:[3, 1, 10, 3]


epoch:0,batch:206,lr:0.001,loss:2.4845,mean_loss:2.352,mean_f1:0.073:  21%|█▊       | 207/1000 [00:53<03:30,  3.77it/s]

preds:[0, 0, 0, 0],label:[7, 1, 1, 6]


epoch:0,batch:207,lr:0.001,loss:2.4986,mean_loss:2.352,mean_f1:0.073:  21%|█▊       | 208/1000 [00:53<03:29,  3.78it/s]

preds:[0, 0, 0, 0],label:[1, 6, 6, 1]


epoch:0,batch:208,lr:0.001,loss:2.7963,mean_loss:2.355,mean_f1:0.073:  21%|█▉       | 209/1000 [00:54<03:24,  3.87it/s]

preds:[0, 0, 0, 0],label:[8, 7, 3, 5]


epoch:0,batch:209,lr:0.001,loss:3.0808,mean_loss:2.358,mean_f1:0.072:  21%|█▉       | 210/1000 [00:54<03:20,  3.94it/s]

preds:[0, 0, 0, 0],label:[9, 7, 1, 9]


epoch:0,batch:210,lr:0.001,loss:2.7675,mean_loss:2.36,mean_f1:0.072:  21%|██        | 211/1000 [00:54<03:17,  3.99it/s]

preds:[0, 0, 0, 0],label:[3, 10, 5, 5]


epoch:0,batch:211,lr:0.001,loss:1.8046,mean_loss:2.357,mean_f1:0.074:  21%|█▉       | 212/1000 [00:54<03:18,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 0, 0, 2]


epoch:0,batch:212,lr:0.001,loss:2.0651,mean_loss:2.356,mean_f1:0.073:  21%|█▉       | 213/1000 [00:55<03:19,  3.94it/s]

preds:[0, 0, 0, 0],label:[2, 2, 2, 1]


epoch:0,batch:213,lr:0.001,loss:2.1325,mean_loss:2.355,mean_f1:0.073:  21%|█▉       | 214/1000 [00:55<03:19,  3.94it/s]

preds:[0, 0, 0, 0],label:[3, 2, 2, 3]


epoch:0,batch:214,lr:0.001,loss:2.8602,mean_loss:2.357,mean_f1:0.072:  22%|█▉       | 215/1000 [00:55<03:17,  3.98it/s]

preds:[0, 0, 0, 0],label:[10, 9, 3, 2]


epoch:0,batch:215,lr:0.001,loss:1.9433,mean_loss:2.355,mean_f1:0.074:  22%|█▉       | 216/1000 [00:55<03:16,  4.00it/s]

preds:[0, 0, 0, 0],label:[0, 2, 0, 2]


epoch:0,batch:216,lr:0.001,loss:2.1138,mean_loss:2.354,mean_f1:0.074:  22%|█▉       | 217/1000 [00:56<03:16,  3.99it/s]

preds:[0, 0, 0, 0],label:[4, 2, 3, 0]


epoch:0,batch:217,lr:0.001,loss:1.9506,mean_loss:2.352,mean_f1:0.073:  22%|█▉       | 218/1000 [00:56<03:14,  4.01it/s]

preds:[0, 0, 0, 0],label:[1, 2, 3, 3]


epoch:0,batch:218,lr:0.001,loss:2.0934,mean_loss:2.351,mean_f1:0.074:  22%|█▉       | 219/1000 [00:56<03:14,  4.02it/s]

preds:[0, 0, 0, 0],label:[1, 0, 7, 0]


epoch:0,batch:219,lr:0.001,loss:2.1347,mean_loss:2.35,mean_f1:0.074:  22%|██▏       | 220/1000 [00:56<03:16,  3.97it/s]

preds:[0, 0, 0, 0],label:[0, 1, 6, 1]


epoch:0,batch:220,lr:0.001,loss:1.8776,mean_loss:2.348,mean_f1:0.075:  22%|█▉       | 221/1000 [00:57<03:15,  3.98it/s]

preds:[0, 0, 0, 0],label:[0, 1, 2, 3]


epoch:0,batch:221,lr:0.001,loss:2.0196,mean_loss:2.347,mean_f1:0.075:  22%|█▉       | 222/1000 [00:57<03:13,  4.02it/s]

preds:[0, 0, 0, 0],label:[5, 0, 0, 1]


epoch:0,batch:222,lr:0.001,loss:2.4697,mean_loss:2.347,mean_f1:0.075:  22%|██       | 223/1000 [00:57<03:10,  4.08it/s]

preds:[0, 0, 0, 0],label:[0, 8, 2, 4]


epoch:0,batch:223,lr:0.001,loss:1.8468,mean_loss:2.345,mean_f1:0.075:  22%|██       | 224/1000 [00:57<03:13,  4.00it/s]

preds:[0, 0, 0, 0],label:[3, 3, 1, 3]


epoch:0,batch:224,lr:0.001,loss:2.3527,mean_loss:2.345,mean_f1:0.075:  22%|██       | 225/1000 [00:58<03:13,  4.01it/s]

preds:[0, 0, 0, 0],label:[2, 5, 2, 4]


epoch:0,batch:225,lr:0.001,loss:1.8744,mean_loss:2.343,mean_f1:0.074:  23%|██       | 226/1000 [00:58<03:12,  4.01it/s]

preds:[0, 0, 0, 0],label:[1, 2, 1, 1]


epoch:0,batch:226,lr:0.001,loss:2.1418,mean_loss:2.342,mean_f1:0.074:  23%|██       | 227/1000 [00:58<03:12,  4.01it/s]

preds:[0, 0, 0, 0],label:[1, 1, 7, 1]


epoch:0,batch:227,lr:0.001,loss:1.784,mean_loss:2.34,mean_f1:0.075:  23%|██▌        | 228/1000 [00:58<03:16,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 1, 0, 0]


epoch:0,batch:228,lr:0.001,loss:2.1812,mean_loss:2.339,mean_f1:0.074:  23%|██       | 229/1000 [00:59<03:15,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 0, 0, 8]


epoch:0,batch:229,lr:0.001,loss:1.7598,mean_loss:2.336,mean_f1:0.075:  23%|██       | 230/1000 [00:59<03:13,  3.99it/s]

preds:[0, 0, 0, 0],label:[1, 3, 1, 0]


epoch:0,batch:230,lr:0.001,loss:1.7855,mean_loss:2.334,mean_f1:0.075:  23%|██       | 231/1000 [00:59<03:13,  3.98it/s]

preds:[1, 1, 1, 1],label:[2, 1, 3, 1]


epoch:0,batch:231,lr:0.001,loss:2.8697,mean_loss:2.336,mean_f1:0.075:  23%|██       | 232/1000 [00:59<03:11,  4.02it/s]

preds:[1, 1, 1, 1],label:[6, 8, 3, 7]


epoch:0,batch:232,lr:0.001,loss:2.5356,mean_loss:2.337,mean_f1:0.075:  23%|██       | 233/1000 [01:00<03:24,  3.75it/s]

preds:[1, 1, 1, 1],label:[8, 6, 2, 0]


epoch:0,batch:233,lr:0.001,loss:1.8156,mean_loss:2.335,mean_f1:0.074:  23%|██       | 234/1000 [01:00<03:24,  3.75it/s]

preds:[1, 1, 1, 1],label:[0, 2, 2, 0]


epoch:0,batch:234,lr:0.001,loss:2.4966,mean_loss:2.336,mean_f1:0.074:  24%|██       | 235/1000 [01:00<03:20,  3.82it/s]

preds:[1, 1, 1, 1],label:[7, 7, 2, 0]


epoch:0,batch:235,lr:0.001,loss:2.3738,mean_loss:2.336,mean_f1:0.075:  24%|██       | 236/1000 [01:01<03:15,  3.90it/s]

preds:[1, 1, 1, 1],label:[6, 1, 1, 7]


epoch:0,batch:236,lr:0.001,loss:2.3583,mean_loss:2.336,mean_f1:0.074:  24%|██▏      | 237/1000 [01:01<03:13,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 0, 12, 2]


epoch:0,batch:237,lr:0.001,loss:2.054,mean_loss:2.335,mean_f1:0.074:  24%|██▍       | 238/1000 [01:01<03:14,  3.92it/s]

preds:[1, 1, 1, 1],label:[6, 2, 0, 1]


epoch:0,batch:238,lr:0.001,loss:2.1258,mean_loss:2.334,mean_f1:0.074:  24%|██▏      | 239/1000 [01:01<03:15,  3.89it/s]

preds:[1, 1, 1, 1],label:[0, 8, 1, 2]


epoch:0,batch:239,lr:0.001,loss:2.0307,mean_loss:2.332,mean_f1:0.075:  24%|██▏      | 240/1000 [01:02<03:14,  3.90it/s]

preds:[0, 0, 0, 0],label:[5, 0, 1, 3]


epoch:0,batch:240,lr:0.001,loss:2.0363,mean_loss:2.331,mean_f1:0.075:  24%|██▏      | 241/1000 [01:02<03:14,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 1, 0, 7]


epoch:0,batch:241,lr:0.001,loss:1.6754,mean_loss:2.329,mean_f1:0.076:  24%|██▏      | 242/1000 [01:02<03:12,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 0, 3, 0]


epoch:0,batch:242,lr:0.001,loss:2.0587,mean_loss:2.327,mean_f1:0.076:  24%|██▏      | 243/1000 [01:02<03:10,  3.98it/s]

preds:[0, 0, 0, 0],label:[2, 1, 0, 4]


epoch:0,batch:243,lr:0.001,loss:1.7527,mean_loss:2.325,mean_f1:0.077:  24%|██▏      | 244/1000 [01:03<03:09,  4.00it/s]

preds:[0, 0, 0, 0],label:[0, 3, 0, 3]


epoch:0,batch:244,lr:0.001,loss:2.5308,mean_loss:2.326,mean_f1:0.077:  24%|██▏      | 245/1000 [01:03<03:10,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 5, 0, 9]


epoch:0,batch:245,lr:0.001,loss:3.0665,mean_loss:2.329,mean_f1:0.077:  25%|██▏      | 246/1000 [01:03<03:13,  3.90it/s]

preds:[0, 0, 0, 0],label:[12, 2, 4, 5]


epoch:0,batch:246,lr:0.001,loss:2.0463,mean_loss:2.328,mean_f1:0.077:  25%|██▏      | 247/1000 [01:03<03:10,  3.95it/s]

preds:[0, 0, 0, 0],label:[1, 0, 3, 5]


epoch:0,batch:247,lr:0.001,loss:2.0737,mean_loss:2.327,mean_f1:0.077:  25%|██▏      | 248/1000 [01:04<03:10,  3.95it/s]

preds:[0, 0, 0, 0],label:[2, 1, 1, 5]


epoch:0,batch:248,lr:0.001,loss:2.3397,mean_loss:2.327,mean_f1:0.077:  25%|██▏      | 249/1000 [01:04<03:09,  3.97it/s]

preds:[0, 0, 0, 0],label:[1, 0, 13, 0]


epoch:0,batch:249,lr:0.001,loss:2.0501,mean_loss:2.326,mean_f1:0.078:  25%|██▎      | 250/1000 [01:04<03:09,  3.95it/s]

preds:[0, 0, 0, 0],label:[2, 1, 4, 0]


epoch:0,batch:250,lr:0.001,loss:1.8006,mean_loss:2.324,mean_f1:0.078:  25%|██▎      | 251/1000 [01:04<03:09,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 3, 1, 3]


epoch:0,batch:251,lr:0.001,loss:1.4891,mean_loss:2.32,mean_f1:0.079:  25%|██▌       | 252/1000 [01:05<03:10,  3.93it/s]

preds:[0, 0, 0, 0],label:[1, 0, 0, 1]


epoch:0,batch:252,lr:0.001,loss:2.5044,mean_loss:2.321,mean_f1:0.079:  25%|██▎      | 253/1000 [01:05<03:07,  3.98it/s]

preds:[0, 0, 0, 0],label:[3, 8, 0, 6]


epoch:0,batch:253,lr:0.001,loss:1.9642,mean_loss:2.32,mean_f1:0.079:  25%|██▌       | 254/1000 [01:05<03:07,  3.98it/s]

preds:[0, 0, 0, 0],label:[1, 1, 5, 1]


epoch:0,batch:254,lr:0.001,loss:3.2605,mean_loss:2.323,mean_f1:0.078:  26%|██▎      | 255/1000 [01:05<03:10,  3.92it/s]

preds:[0, 0, 0, 0],label:[4, 11, 2, 5]


epoch:0,batch:255,lr:0.001,loss:1.9609,mean_loss:2.322,mean_f1:0.078:  26%|██▎      | 256/1000 [01:06<03:10,  3.90it/s]

preds:[0, 0, 0, 0],label:[0, 5, 2, 1]


epoch:0,batch:256,lr:0.001,loss:2.4107,mean_loss:2.322,mean_f1:0.078:  26%|██▎      | 257/1000 [01:06<03:19,  3.72it/s]

preds:[0, 0, 0, 0],label:[4, 1, 4, 1]


epoch:0,batch:257,lr:0.001,loss:2.4308,mean_loss:2.323,mean_f1:0.078:  26%|██▎      | 258/1000 [01:06<03:17,  3.75it/s]

preds:[0, 0, 0, 0],label:[10, 2, 0, 2]


epoch:0,batch:258,lr:0.001,loss:2.2861,mean_loss:2.323,mean_f1:0.079:  26%|██▎      | 259/1000 [01:06<03:12,  3.84it/s]

preds:[0, 0, 0, 0],label:[10, 3, 0, 0]


epoch:0,batch:259,lr:0.001,loss:3.4575,mean_loss:2.327,mean_f1:0.078:  26%|██▎      | 260/1000 [01:07<03:11,  3.87it/s]

preds:[0, 0, 0, 0],label:[9, 3, 11, 5]


epoch:0,batch:260,lr:0.001,loss:2.2598,mean_loss:2.327,mean_f1:0.078:  26%|██▎      | 261/1000 [01:07<03:08,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 2, 1, 8]


epoch:0,batch:261,lr:0.001,loss:2.2404,mean_loss:2.326,mean_f1:0.078:  26%|██▎      | 262/1000 [01:07<03:08,  3.91it/s]

preds:[0, 0, 0, 0],label:[7, 1, 0, 5]


epoch:0,batch:262,lr:0.001,loss:2.2242,mean_loss:2.326,mean_f1:0.078:  26%|██▎      | 263/1000 [01:07<03:08,  3.91it/s]

preds:[0, 0, 0, 0],label:[3, 2, 4, 1]


epoch:0,batch:263,lr:0.001,loss:3.3514,mean_loss:2.33,mean_f1:0.078:  26%|██▋       | 264/1000 [01:08<03:07,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 4, 8, 11]


epoch:0,batch:264,lr:0.001,loss:1.8708,mean_loss:2.328,mean_f1:0.078:  26%|██▍      | 265/1000 [01:08<03:06,  3.93it/s]

preds:[0, 0, 0, 0],label:[2, 2, 0, 2]


epoch:0,batch:265,lr:0.001,loss:2.3671,mean_loss:2.328,mean_f1:0.078:  27%|██▍      | 266/1000 [01:08<03:06,  3.94it/s]

preds:[0, 0, 0, 0],label:[2, 6, 1, 5]


epoch:0,batch:266,lr:0.001,loss:1.9002,mean_loss:2.327,mean_f1:0.078:  27%|██▍      | 267/1000 [01:08<03:05,  3.96it/s]

preds:[0, 0, 0, 0],label:[2, 2, 1, 3]


epoch:0,batch:267,lr:0.001,loss:2.8913,mean_loss:2.329,mean_f1:0.077:  27%|██▍      | 268/1000 [01:09<03:07,  3.91it/s]

preds:[1, 1, 1, 1],label:[8, 2, 6, 4]


epoch:0,batch:268,lr:0.001,loss:1.6831,mean_loss:2.326,mean_f1:0.078:  27%|██▍      | 269/1000 [01:09<03:08,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 3, 0, 1]


epoch:0,batch:269,lr:0.001,loss:2.1448,mean_loss:2.326,mean_f1:0.077:  27%|██▍      | 270/1000 [01:09<03:08,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 2, 2, 8]


epoch:0,batch:270,lr:0.001,loss:2.3323,mean_loss:2.326,mean_f1:0.078:  27%|██▍      | 271/1000 [01:09<03:04,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 6, 1, 4]


epoch:0,batch:271,lr:0.001,loss:2.0387,mean_loss:2.325,mean_f1:0.078:  27%|██▍      | 272/1000 [01:10<03:04,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 7, 2, 1]


epoch:0,batch:272,lr:0.001,loss:1.6551,mean_loss:2.322,mean_f1:0.079:  27%|██▍      | 273/1000 [01:10<03:06,  3.90it/s]

preds:[1, 1, 1, 1],label:[0, 0, 0, 1]


epoch:0,batch:273,lr:0.001,loss:2.1309,mean_loss:2.321,mean_f1:0.078:  27%|██▍      | 274/1000 [01:10<03:04,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 0, 7, 2]


epoch:0,batch:274,lr:0.001,loss:2.1178,mean_loss:2.321,mean_f1:0.078:  28%|██▍      | 275/1000 [01:10<03:04,  3.94it/s]

preds:[1, 1, 1, 1],label:[4, 3, 2, 2]


epoch:0,batch:275,lr:0.001,loss:2.3283,mean_loss:2.321,mean_f1:0.078:  28%|██▍      | 276/1000 [01:11<03:04,  3.93it/s]

preds:[1, 1, 1, 1],label:[4, 1, 8, 2]


epoch:0,batch:276,lr:0.001,loss:2.0665,mean_loss:2.32,mean_f1:0.078:  28%|██▊       | 277/1000 [01:11<03:04,  3.91it/s]

preds:[1, 1, 1, 1],label:[3, 2, 3, 3]


epoch:0,batch:277,lr:0.001,loss:2.1379,mean_loss:2.319,mean_f1:0.078:  28%|██▌      | 278/1000 [01:11<03:05,  3.90it/s]

preds:[1, 1, 1, 1],label:[2, 1, 8, 3]


epoch:0,batch:278,lr:0.001,loss:1.9384,mean_loss:2.318,mean_f1:0.078:  28%|██▌      | 279/1000 [01:12<03:04,  3.91it/s]

preds:[2, 2, 2, 2],label:[3, 3, 0, 1]


epoch:0,batch:279,lr:0.001,loss:2.4699,mean_loss:2.318,mean_f1:0.077:  28%|██▌      | 280/1000 [01:12<03:02,  3.95it/s]

preds:[2, 2, 2, 2],label:[3, 5, 7, 1]


epoch:0,batch:280,lr:0.001,loss:1.7244,mean_loss:2.316,mean_f1:0.077:  28%|██▌      | 281/1000 [01:12<03:03,  3.93it/s]

preds:[2, 2, 2, 2],label:[1, 3, 1, 1]


epoch:0,batch:281,lr:0.001,loss:2.9767,mean_loss:2.319,mean_f1:0.077:  28%|██▌      | 282/1000 [01:12<03:12,  3.74it/s]

preds:[2, 2, 2, 2],label:[6, 10, 5, 1]


epoch:0,batch:282,lr:0.001,loss:1.6322,mean_loss:2.316,mean_f1:0.077:  28%|██▌      | 283/1000 [01:13<03:10,  3.76it/s]

preds:[2, 2, 2, 2],label:[2, 1, 0, 2]


epoch:0,batch:283,lr:0.001,loss:2.0699,mean_loss:2.315,mean_f1:0.077:  28%|██▌      | 284/1000 [01:13<03:07,  3.82it/s]

preds:[2, 2, 2, 2],label:[1, 6, 2, 3]


epoch:0,batch:284,lr:0.001,loss:2.3868,mean_loss:2.316,mean_f1:0.077:  28%|██▌      | 285/1000 [01:13<03:06,  3.84it/s]

preds:[2, 2, 2, 2],label:[1, 4, 6, 0]


epoch:0,batch:285,lr:0.001,loss:2.0528,mean_loss:2.315,mean_f1:0.078:  29%|██▌      | 286/1000 [01:13<03:02,  3.91it/s]

preds:[2, 2, 2, 2],label:[8, 0, 2, 2]


epoch:0,batch:286,lr:0.001,loss:2.2017,mean_loss:2.314,mean_f1:0.077:  29%|██▌      | 287/1000 [01:14<03:02,  3.91it/s]

preds:[2, 2, 2, 2],label:[0, 1, 0, 8]


epoch:0,batch:287,lr:0.001,loss:2.2514,mean_loss:2.314,mean_f1:0.077:  29%|██▌      | 288/1000 [01:14<03:01,  3.91it/s]

preds:[2, 2, 2, 2],label:[1, 4, 8, 2]


epoch:0,batch:288,lr:0.001,loss:2.2006,mean_loss:2.314,mean_f1:0.077:  29%|██▌      | 289/1000 [01:14<02:59,  3.95it/s]

preds:[2, 2, 2, 2],label:[4, 3, 3, 3]


epoch:0,batch:289,lr:0.001,loss:1.6253,mean_loss:2.311,mean_f1:0.078:  29%|██▌      | 290/1000 [01:14<03:02,  3.90it/s]

preds:[2, 2, 2, 2],label:[2, 2, 3, 2]


epoch:0,batch:290,lr:0.001,loss:1.5289,mean_loss:2.309,mean_f1:0.079:  29%|██▌      | 291/1000 [01:15<03:01,  3.92it/s]

preds:[2, 2, 2, 2],label:[1, 2, 1, 1]


epoch:0,batch:291,lr:0.001,loss:3.3259,mean_loss:2.312,mean_f1:0.078:  29%|██▋      | 292/1000 [01:15<02:59,  3.95it/s]

preds:[2, 2, 2, 2],label:[9, 4, 6, 6]


epoch:0,batch:292,lr:0.001,loss:2.2056,mean_loss:2.312,mean_f1:0.078:  29%|██▋      | 293/1000 [01:15<02:58,  3.96it/s]

preds:[2, 2, 2, 2],label:[3, 1, 3, 7]


epoch:0,batch:293,lr:0.001,loss:2.4749,mean_loss:2.312,mean_f1:0.078:  29%|██▋      | 294/1000 [01:15<02:56,  4.00it/s]

preds:[2, 2, 2, 2],label:[3, 3, 1, 12]


epoch:0,batch:294,lr:0.001,loss:2.5978,mean_loss:2.313,mean_f1:0.078:  30%|██▋      | 295/1000 [01:16<02:57,  3.96it/s]

preds:[2, 2, 2, 2],label:[3, 5, 4, 4]


epoch:0,batch:295,lr:0.001,loss:1.5205,mean_loss:2.311,mean_f1:0.078:  30%|██▋      | 296/1000 [01:16<02:57,  3.98it/s]

preds:[2, 2, 2, 2],label:[1, 2, 1, 1]


epoch:0,batch:296,lr:0.001,loss:1.9966,mean_loss:2.309,mean_f1:0.078:  30%|██▋      | 297/1000 [01:16<02:56,  3.99it/s]

preds:[1, 1, 1, 1],label:[1, 0, 2, 4]


epoch:0,batch:297,lr:0.001,loss:2.6839,mean_loss:2.311,mean_f1:0.078:  30%|██▋      | 298/1000 [01:16<02:56,  3.98it/s]

preds:[1, 1, 1, 1],label:[10, 0, 0, 3]


epoch:0,batch:298,lr:0.001,loss:2.2831,mean_loss:2.311,mean_f1:0.078:  30%|██▋      | 299/1000 [01:17<02:57,  3.96it/s]

preds:[1, 1, 1, 1],label:[1, 0, 5, 0]


epoch:0,batch:299,lr:0.001,loss:2.1559,mean_loss:2.31,mean_f1:0.078:  30%|███       | 300/1000 [01:17<02:57,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 0, 4, 1]


epoch:0,batch:300,lr:0.001,loss:2.0346,mean_loss:2.309,mean_f1:0.078:  30%|██▋      | 301/1000 [01:17<02:58,  3.91it/s]

preds:[1, 1, 1, 1],label:[3, 3, 1, 5]


epoch:0,batch:301,lr:0.001,loss:2.5205,mean_loss:2.31,mean_f1:0.078:  30%|███       | 302/1000 [01:17<02:58,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 0, 0, 7]


epoch:0,batch:302,lr:0.001,loss:2.3249,mean_loss:2.31,mean_f1:0.078:  30%|███       | 303/1000 [01:18<02:55,  3.97it/s]

preds:[1, 1, 1, 1],label:[3, 9, 1, 2]


epoch:0,batch:303,lr:0.001,loss:2.1871,mean_loss:2.31,mean_f1:0.078:  30%|███       | 304/1000 [01:18<02:54,  3.99it/s]

preds:[1, 1, 1, 1],label:[5, 0, 1, 0]


epoch:0,batch:304,lr:0.001,loss:1.8417,mean_loss:2.308,mean_f1:0.078:  30%|██▋      | 305/1000 [01:18<02:53,  4.01it/s]

preds:[1, 1, 1, 1],label:[2, 2, 0, 2]


epoch:0,batch:305,lr:0.001,loss:1.9003,mean_loss:2.307,mean_f1:0.078:  31%|██▊      | 306/1000 [01:18<02:53,  4.00it/s]

preds:[1, 1, 1, 1],label:[4, 1, 2, 3]


epoch:0,batch:306,lr:0.001,loss:3.489,mean_loss:2.311,mean_f1:0.078:  31%|███       | 307/1000 [01:19<03:00,  3.83it/s]

preds:[1, 1, 1, 1],label:[6, 11, 0, 10]


epoch:0,batch:307,lr:0.001,loss:1.9885,mean_loss:2.309,mean_f1:0.078:  31%|██▊      | 308/1000 [01:19<03:00,  3.83it/s]

preds:[1, 1, 1, 1],label:[2, 3, 2, 4]


epoch:0,batch:308,lr:0.001,loss:2.2727,mean_loss:2.309,mean_f1:0.078:  31%|██▊      | 309/1000 [01:19<02:59,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 1, 0, 10]


epoch:0,batch:309,lr:0.001,loss:1.9255,mean_loss:2.308,mean_f1:0.078:  31%|██▊      | 310/1000 [01:19<02:58,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 4, 3, 1]


epoch:0,batch:310,lr:0.001,loss:2.0279,mean_loss:2.307,mean_f1:0.079:  31%|██▊      | 311/1000 [01:20<02:58,  3.86it/s]

preds:[1, 1, 1, 1],label:[3, 1, 1, 8]


epoch:0,batch:311,lr:0.001,loss:2.5317,mean_loss:2.308,mean_f1:0.079:  31%|██▊      | 312/1000 [01:20<02:56,  3.89it/s]

preds:[1, 1, 1, 1],label:[4, 9, 1, 3]


epoch:0,batch:312,lr:0.001,loss:2.8002,mean_loss:2.31,mean_f1:0.079:  31%|███▏      | 313/1000 [01:20<02:56,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 3, 8, 11]


epoch:0,batch:313,lr:0.001,loss:1.7353,mean_loss:2.308,mean_f1:0.079:  31%|██▊      | 314/1000 [01:20<02:54,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 3, 1, 3]


epoch:0,batch:314,lr:0.001,loss:1.7288,mean_loss:2.306,mean_f1:0.08:  32%|███▏      | 315/1000 [01:21<02:53,  3.96it/s]

preds:[1, 1, 1, 1],label:[0, 1, 3, 3]


epoch:0,batch:315,lr:0.001,loss:2.1708,mean_loss:2.305,mean_f1:0.08:  32%|███▏      | 316/1000 [01:21<02:49,  4.03it/s]

preds:[1, 1, 1, 1],label:[0, 1, 7, 0]


epoch:0,batch:316,lr:0.001,loss:2.629,mean_loss:2.306,mean_f1:0.08:  32%|███▍       | 317/1000 [01:21<02:49,  4.03it/s]

preds:[1, 1, 1, 1],label:[5, 2, 1, 10]


epoch:0,batch:317,lr:0.001,loss:1.9926,mean_loss:2.305,mean_f1:0.081:  32%|██▊      | 318/1000 [01:21<02:48,  4.04it/s]

preds:[1, 1, 1, 1],label:[4, 1, 4, 1]


epoch:0,batch:318,lr:0.001,loss:2.2054,mean_loss:2.305,mean_f1:0.081:  32%|██▊      | 319/1000 [01:22<02:51,  3.97it/s]

preds:[1, 1, 1, 1],label:[2, 7, 0, 1]


epoch:0,batch:319,lr:0.001,loss:2.3598,mean_loss:2.305,mean_f1:0.081:  32%|██▉      | 320/1000 [01:22<02:54,  3.91it/s]

preds:[1, 1, 1, 1],label:[2, 1, 10, 2]


epoch:0,batch:320,lr:0.001,loss:1.754,mean_loss:2.304,mean_f1:0.081:  32%|███▏      | 321/1000 [01:22<02:52,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 1, 0, 0]


epoch:0,batch:321,lr:0.001,loss:2.7016,mean_loss:2.305,mean_f1:0.081:  32%|██▉      | 322/1000 [01:22<02:51,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 0, 9, 6]


epoch:0,batch:322,lr:0.001,loss:2.1981,mean_loss:2.305,mean_f1:0.081:  32%|██▉      | 323/1000 [01:23<02:48,  4.01it/s]

preds:[1, 1, 1, 1],label:[10, 0, 1, 3]


epoch:0,batch:323,lr:0.001,loss:1.4334,mean_loss:2.302,mean_f1:0.084:  32%|██▉      | 324/1000 [01:23<02:51,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 1, 1, 1]


epoch:0,batch:324,lr:0.001,loss:2.0674,mean_loss:2.301,mean_f1:0.085:  32%|██▉      | 325/1000 [01:23<02:51,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 1, 2, 6]


epoch:0,batch:325,lr:0.001,loss:1.6867,mean_loss:2.299,mean_f1:0.085:  33%|██▉      | 326/1000 [01:23<02:51,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 0, 0, 1]


epoch:0,batch:326,lr:0.001,loss:1.9052,mean_loss:2.298,mean_f1:0.085:  33%|██▉      | 327/1000 [01:24<02:51,  3.92it/s]

preds:[1, 1, 1, 1],label:[5, 1, 0, 1]


epoch:0,batch:327,lr:0.001,loss:2.8359,mean_loss:2.3,mean_f1:0.085:  33%|███▌       | 328/1000 [01:24<02:51,  3.91it/s]

preds:[1, 1, 1, 1],label:[11, 0, 3, 5]


epoch:0,batch:328,lr:0.001,loss:1.8709,mean_loss:2.298,mean_f1:0.085:  33%|██▉      | 329/1000 [01:24<02:50,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 4, 3, 1]


epoch:0,batch:329,lr:0.001,loss:2.2929,mean_loss:2.298,mean_f1:0.085:  33%|██▉      | 330/1000 [01:24<02:48,  3.96it/s]

preds:[1, 1, 1, 1],label:[1, 4, 8, 3]


epoch:0,batch:330,lr:0.001,loss:2.7224,mean_loss:2.3,mean_f1:0.085:  33%|███▋       | 331/1000 [01:25<02:48,  3.96it/s]

preds:[1, 1, 1, 1],label:[12, 0, 4, 3]


epoch:0,batch:331,lr:0.001,loss:2.5402,mean_loss:2.3,mean_f1:0.084:  33%|███▋       | 332/1000 [01:25<02:56,  3.79it/s]

preds:[1, 1, 1, 1],label:[2, 7, 0, 4]


epoch:0,batch:332,lr:0.001,loss:2.077,mean_loss:2.3,mean_f1:0.085:  33%|███▉        | 333/1000 [01:25<02:58,  3.74it/s]

preds:[1, 1, 1, 1],label:[0, 1, 8, 3]


epoch:0,batch:333,lr:0.001,loss:2.5555,mean_loss:2.3,mean_f1:0.084:  33%|███▋       | 334/1000 [01:26<02:55,  3.79it/s]

preds:[1, 1, 1, 1],label:[0, 6, 0, 6]


epoch:0,batch:334,lr:0.001,loss:2.071,mean_loss:2.3,mean_f1:0.085:  34%|████        | 335/1000 [01:26<02:52,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 2, 2, 2]


epoch:0,batch:335,lr:0.001,loss:2.3466,mean_loss:2.3,mean_f1:0.084:  34%|███▋       | 336/1000 [01:26<02:52,  3.86it/s]

preds:[1, 1, 1, 1],label:[4, 3, 4, 2]


epoch:0,batch:336,lr:0.001,loss:2.5864,mean_loss:2.301,mean_f1:0.084:  34%|███      | 337/1000 [01:26<02:49,  3.90it/s]

preds:[1, 1, 1, 1],label:[6, 3, 0, 6]


epoch:0,batch:337,lr:0.001,loss:2.1183,mean_loss:2.3,mean_f1:0.085:  34%|███▋       | 338/1000 [01:27<02:51,  3.86it/s]

preds:[1, 1, 1, 1],label:[2, 7, 1, 1]


epoch:0,batch:338,lr:0.001,loss:2.2021,mean_loss:2.3,mean_f1:0.084:  34%|███▋       | 339/1000 [01:27<02:48,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 2, 2, 2]


epoch:0,batch:339,lr:0.001,loss:1.7872,mean_loss:2.298,mean_f1:0.084:  34%|███      | 340/1000 [01:27<02:48,  3.92it/s]

preds:[1, 1, 1, 1],label:[1, 0, 2, 3]


epoch:0,batch:340,lr:0.001,loss:2.8389,mean_loss:2.3,mean_f1:0.084:  34%|███▊       | 341/1000 [01:27<02:47,  3.94it/s]

preds:[1, 1, 1, 1],label:[5, 12, 1, 2]


epoch:0,batch:341,lr:0.001,loss:2.7872,mean_loss:2.301,mean_f1:0.084:  34%|███      | 342/1000 [01:28<02:47,  3.92it/s]

preds:[1, 1, 1, 1],label:[4, 2, 6, 5]


epoch:0,batch:342,lr:0.001,loss:2.6683,mean_loss:2.302,mean_f1:0.084:  34%|███      | 343/1000 [01:28<02:48,  3.91it/s]

preds:[1, 1, 1, 1],label:[3, 5, 8, 2]


epoch:0,batch:343,lr:0.001,loss:2.0229,mean_loss:2.302,mean_f1:0.084:  34%|███      | 344/1000 [01:28<02:49,  3.86it/s]

preds:[1, 1, 1, 1],label:[2, 2, 4, 1]


epoch:0,batch:344,lr:0.001,loss:2.4201,mean_loss:2.302,mean_f1:0.084:  34%|███      | 345/1000 [01:28<02:47,  3.90it/s]

preds:[1, 1, 1, 1],label:[5, 5, 3, 1]


epoch:0,batch:345,lr:0.001,loss:1.7061,mean_loss:2.3,mean_f1:0.085:  35%|███▊       | 346/1000 [01:29<02:47,  3.91it/s]

preds:[1, 1, 1, 1],label:[1, 1, 1, 4]


epoch:0,batch:346,lr:0.001,loss:1.8106,mean_loss:2.299,mean_f1:0.085:  35%|███      | 347/1000 [01:29<02:46,  3.92it/s]

preds:[1, 1, 1, 1],label:[3, 2, 1, 3]


epoch:0,batch:347,lr:0.001,loss:1.9256,mean_loss:2.298,mean_f1:0.085:  35%|███▏     | 348/1000 [01:29<02:45,  3.93it/s]

preds:[1, 1, 1, 1],label:[4, 3, 1, 3]


epoch:0,batch:348,lr:0.001,loss:1.8884,mean_loss:2.297,mean_f1:0.085:  35%|███▏     | 349/1000 [01:29<02:45,  3.92it/s]

preds:[1, 1, 1, 1],label:[2, 0, 3, 2]


epoch:0,batch:349,lr:0.001,loss:2.557,mean_loss:2.297,mean_f1:0.085:  35%|███▌      | 350/1000 [01:30<02:46,  3.91it/s]

preds:[1, 1, 1, 1],label:[1, 3, 6, 10]


epoch:0,batch:350,lr:0.001,loss:1.8941,mean_loss:2.296,mean_f1:0.085:  35%|███▏     | 351/1000 [01:30<02:42,  3.99it/s]

preds:[1, 1, 1, 1],label:[0, 2, 0, 0]


epoch:0,batch:351,lr:0.001,loss:1.8799,mean_loss:2.295,mean_f1:0.085:  35%|███▏     | 352/1000 [01:30<02:43,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 0, 2, 2]


epoch:0,batch:352,lr:0.001,loss:1.6546,mean_loss:2.293,mean_f1:0.085:  35%|███▏     | 353/1000 [01:30<02:44,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 2, 1, 1]


epoch:0,batch:353,lr:0.001,loss:2.5949,mean_loss:2.294,mean_f1:0.085:  35%|███▏     | 354/1000 [01:31<02:43,  3.95it/s]

preds:[1, 1, 1, 1],label:[4, 11, 0, 1]


epoch:0,batch:354,lr:0.001,loss:2.0853,mean_loss:2.293,mean_f1:0.086:  36%|███▏     | 355/1000 [01:31<02:43,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 3, 0, 6]


epoch:0,batch:355,lr:0.001,loss:2.5228,mean_loss:2.294,mean_f1:0.085:  36%|███▏     | 356/1000 [01:31<02:41,  3.98it/s]

preds:[1, 1, 1, 1],label:[3, 2, 8, 5]


epoch:0,batch:356,lr:0.001,loss:2.9954,mean_loss:2.296,mean_f1:0.085:  36%|███▏     | 357/1000 [01:31<02:50,  3.77it/s]

preds:[1, 1, 1, 1],label:[6, 1, 9, 5]


epoch:0,batch:357,lr:0.001,loss:2.7901,mean_loss:2.297,mean_f1:0.085:  36%|███▏     | 358/1000 [01:32<02:49,  3.79it/s]

preds:[1, 1, 1, 1],label:[7, 7, 0, 0]


epoch:0,batch:358,lr:0.001,loss:2.1006,mean_loss:2.297,mean_f1:0.085:  36%|███▏     | 359/1000 [01:32<02:44,  3.89it/s]

preds:[1, 1, 1, 1],label:[2, 4, 4, 2]


epoch:0,batch:359,lr:0.001,loss:2.4813,mean_loss:2.297,mean_f1:0.085:  36%|███▏     | 360/1000 [01:32<02:43,  3.91it/s]

preds:[1, 1, 1, 1],label:[6, 3, 1, 8]


epoch:0,batch:360,lr:0.001,loss:2.4398,mean_loss:2.298,mean_f1:0.085:  36%|███▏     | 361/1000 [01:32<02:43,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 2, 0, 9]


epoch:0,batch:361,lr:0.001,loss:1.9677,mean_loss:2.297,mean_f1:0.085:  36%|███▎     | 362/1000 [01:33<02:42,  3.93it/s]

preds:[1, 1, 1, 1],label:[4, 0, 1, 3]


epoch:0,batch:362,lr:0.001,loss:1.7518,mean_loss:2.295,mean_f1:0.085:  36%|███▎     | 363/1000 [01:33<02:41,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 1, 1, 3]


epoch:0,batch:363,lr:0.001,loss:1.8129,mean_loss:2.294,mean_f1:0.085:  36%|███▎     | 364/1000 [01:33<02:39,  3.98it/s]

preds:[1, 1, 1, 1],label:[0, 0, 0, 2]


epoch:0,batch:364,lr:0.001,loss:2.1241,mean_loss:2.294,mean_f1:0.085:  36%|███▎     | 365/1000 [01:33<02:41,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 4, 4, 0]


epoch:0,batch:365,lr:0.001,loss:2.3335,mean_loss:2.294,mean_f1:0.085:  37%|███▎     | 366/1000 [01:34<02:41,  3.94it/s]

preds:[2, 2, 2, 2],label:[0, 2, 9, 2]


epoch:0,batch:366,lr:0.001,loss:2.1618,mean_loss:2.293,mean_f1:0.086:  37%|███▎     | 367/1000 [01:34<02:40,  3.95it/s]

preds:[2, 2, 2, 2],label:[7, 2, 2, 0]


epoch:0,batch:367,lr:0.001,loss:2.2774,mean_loss:2.293,mean_f1:0.085:  37%|███▎     | 368/1000 [01:34<02:38,  3.98it/s]

preds:[2, 2, 2, 2],label:[3, 3, 1, 7]


epoch:0,batch:368,lr:0.001,loss:1.722,mean_loss:2.292,mean_f1:0.086:  37%|███▋      | 369/1000 [01:34<02:39,  3.96it/s]

preds:[2, 2, 2, 2],label:[2, 0, 3, 2]


epoch:0,batch:369,lr:0.001,loss:2.3552,mean_loss:2.292,mean_f1:0.086:  37%|███▎     | 370/1000 [01:35<02:39,  3.96it/s]

preds:[2, 2, 2, 2],label:[8, 4, 1, 0]


epoch:0,batch:370,lr:0.001,loss:2.1854,mean_loss:2.292,mean_f1:0.085:  37%|███▎     | 371/1000 [01:35<02:37,  3.99it/s]

preds:[2, 2, 2, 2],label:[1, 8, 0, 1]


epoch:0,batch:371,lr:0.001,loss:2.5964,mean_loss:2.292,mean_f1:0.085:  37%|███▎     | 372/1000 [01:35<02:36,  4.01it/s]

preds:[2, 2, 2, 2],label:[11, 1, 4, 0]


epoch:0,batch:372,lr:0.001,loss:2.1278,mean_loss:2.292,mean_f1:0.085:  37%|███▎     | 373/1000 [01:35<02:37,  3.98it/s]

preds:[2, 2, 2, 2],label:[1, 0, 3, 6]


epoch:0,batch:373,lr:0.001,loss:1.8089,mean_loss:2.291,mean_f1:0.085:  37%|███▎     | 374/1000 [01:36<02:36,  4.01it/s]

preds:[2, 2, 2, 2],label:[1, 0, 3, 1]


epoch:0,batch:374,lr:0.001,loss:2.6673,mean_loss:2.292,mean_f1:0.084:  38%|███▍     | 375/1000 [01:36<02:37,  3.97it/s]

preds:[2, 2, 2, 2],label:[4, 7, 5, 0]


epoch:0,batch:375,lr:0.001,loss:2.0383,mean_loss:2.291,mean_f1:0.084:  38%|███▍     | 376/1000 [01:36<02:36,  3.99it/s]

preds:[0, 0, 0, 0],label:[1, 0, 2, 6]


epoch:0,batch:376,lr:0.001,loss:2.635,mean_loss:2.292,mean_f1:0.084:  38%|███▊      | 377/1000 [01:36<02:37,  3.96it/s]

preds:[0, 0, 0, 0],label:[4, 1, 5, 5]


epoch:0,batch:377,lr:0.001,loss:1.6535,mean_loss:2.29,mean_f1:0.085:  38%|███▊      | 378/1000 [01:37<02:38,  3.93it/s]

preds:[0, 0, 0, 0],label:[1, 2, 0, 0]


epoch:0,batch:378,lr:0.001,loss:2.123,mean_loss:2.29,mean_f1:0.085:  38%|████▏      | 379/1000 [01:37<02:37,  3.94it/s]

preds:[0, 0, 0, 0],label:[1, 1, 0, 8]


epoch:0,batch:379,lr:0.001,loss:2.5622,mean_loss:2.291,mean_f1:0.085:  38%|███▍     | 380/1000 [01:37<02:37,  3.94it/s]

preds:[0, 0, 0, 0],label:[3, 3, 9, 3]


epoch:0,batch:380,lr:0.001,loss:2.8807,mean_loss:2.292,mean_f1:0.084:  38%|███▍     | 381/1000 [01:37<02:34,  3.99it/s]

preds:[0, 0, 0, 0],label:[8, 11, 2, 3]


epoch:0,batch:381,lr:0.001,loss:3.0505,mean_loss:2.294,mean_f1:0.084:  38%|███▍     | 382/1000 [01:38<02:46,  3.70it/s]

preds:[0, 0, 0, 0],label:[1, 9, 12, 2]


epoch:0,batch:382,lr:0.001,loss:1.6972,mean_loss:2.293,mean_f1:0.084:  38%|███▍     | 383/1000 [01:38<02:46,  3.71it/s]

preds:[0, 0, 0, 0],label:[0, 2, 2, 1]


epoch:0,batch:383,lr:0.001,loss:2.5966,mean_loss:2.293,mean_f1:0.084:  38%|███▍     | 384/1000 [01:38<02:41,  3.81it/s]

preds:[0, 0, 0, 0],label:[4, 12, 0, 2]


epoch:0,batch:384,lr:0.001,loss:2.2496,mean_loss:2.293,mean_f1:0.084:  38%|███▍     | 385/1000 [01:39<02:39,  3.85it/s]

preds:[0, 0, 0, 0],label:[3, 7, 2, 2]


epoch:0,batch:385,lr:0.001,loss:2.1664,mean_loss:2.293,mean_f1:0.084:  39%|███▍     | 386/1000 [01:39<02:38,  3.88it/s]

preds:[0, 0, 0, 0],label:[2, 2, 7, 2]


epoch:0,batch:386,lr:0.001,loss:2.1997,mean_loss:2.293,mean_f1:0.084:  39%|███▍     | 387/1000 [01:39<02:37,  3.90it/s]

preds:[0, 0, 0, 0],label:[3, 2, 1, 8]


epoch:0,batch:387,lr:0.001,loss:1.6842,mean_loss:2.291,mean_f1:0.084:  39%|███▍     | 388/1000 [01:39<02:35,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 2, 1, 1]


epoch:0,batch:388,lr:0.001,loss:2.4087,mean_loss:2.291,mean_f1:0.084:  39%|███▌     | 389/1000 [01:40<02:34,  3.96it/s]

preds:[0, 0, 0, 0],label:[6, 0, 6, 2]


epoch:0,batch:389,lr:0.001,loss:2.3399,mean_loss:2.292,mean_f1:0.084:  39%|███▌     | 390/1000 [01:40<02:35,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 5, 2, 4]


epoch:0,batch:390,lr:0.001,loss:1.9096,mean_loss:2.291,mean_f1:0.083:  39%|███▌     | 391/1000 [01:40<02:35,  3.90it/s]

preds:[2, 2, 2, 2],label:[1, 0, 4, 1]


epoch:0,batch:391,lr:0.001,loss:2.0585,mean_loss:2.29,mean_f1:0.083:  39%|███▉      | 392/1000 [01:40<02:33,  3.97it/s]

preds:[2, 2, 2, 2],label:[8, 1, 2, 0]


epoch:0,batch:392,lr:0.001,loss:1.7647,mean_loss:2.289,mean_f1:0.083:  39%|███▌     | 393/1000 [01:41<02:33,  3.96it/s]

preds:[2, 2, 2, 2],label:[0, 2, 1, 3]


epoch:0,batch:393,lr:0.001,loss:2.0085,mean_loss:2.288,mean_f1:0.083:  39%|███▌     | 394/1000 [01:41<02:33,  3.94it/s]

preds:[2, 2, 2, 2],label:[3, 4, 0, 1]


epoch:0,batch:394,lr:0.001,loss:2.6364,mean_loss:2.289,mean_f1:0.083:  40%|███▌     | 395/1000 [01:41<02:34,  3.91it/s]

preds:[2, 2, 2, 2],label:[2, 7, 9, 1]


epoch:0,batch:395,lr:0.001,loss:2.687,mean_loss:2.29,mean_f1:0.083:  40%|████▎      | 396/1000 [01:41<02:31,  3.98it/s]

preds:[2, 2, 2, 2],label:[2, 6, 10, 1]


epoch:0,batch:396,lr:0.001,loss:1.8493,mean_loss:2.289,mean_f1:0.084:  40%|███▌     | 397/1000 [01:42<02:28,  4.06it/s]

preds:[2, 2, 2, 2],label:[2, 4, 2, 0]


epoch:0,batch:397,lr:0.001,loss:2.45,mean_loss:2.289,mean_f1:0.084:  40%|████▍      | 398/1000 [01:42<02:27,  4.08it/s]

preds:[2, 2, 2, 2],label:[4, 4, 3, 4]


epoch:0,batch:398,lr:0.001,loss:2.4216,mean_loss:2.289,mean_f1:0.084:  40%|███▌     | 399/1000 [01:42<02:28,  4.05it/s]

preds:[2, 2, 2, 2],label:[8, 0, 5, 2]


epoch:0,batch:399,lr:0.001,loss:2.8632,mean_loss:2.291,mean_f1:0.084:  40%|███▌     | 400/1000 [01:42<02:28,  4.05it/s]

preds:[2, 2, 2, 2],label:[10, 0, 9, 2]


epoch:0,batch:400,lr:0.001,loss:2.2507,mean_loss:2.291,mean_f1:0.083:  40%|███▌     | 401/1000 [01:43<02:30,  3.97it/s]

preds:[2, 2, 2, 2],label:[1, 9, 1, 0]


epoch:0,batch:401,lr:0.001,loss:2.2304,mean_loss:2.291,mean_f1:0.083:  40%|███▌     | 402/1000 [01:43<02:31,  3.95it/s]

preds:[2, 2, 2, 2],label:[5, 2, 0, 4]


epoch:0,batch:402,lr:0.001,loss:2.3107,mean_loss:2.291,mean_f1:0.083:  40%|███▋     | 403/1000 [01:43<02:33,  3.90it/s]

preds:[2, 2, 2, 2],label:[3, 1, 5, 3]


epoch:0,batch:403,lr:0.001,loss:1.7974,mean_loss:2.289,mean_f1:0.083:  40%|███▋     | 404/1000 [01:43<02:31,  3.94it/s]

preds:[2, 2, 2, 2],label:[1, 3, 2, 0]


epoch:0,batch:404,lr:0.001,loss:2.2772,mean_loss:2.289,mean_f1:0.083:  40%|███▋     | 405/1000 [01:44<02:30,  3.95it/s]

preds:[2, 2, 2, 2],label:[1, 1, 2, 10]


epoch:0,batch:405,lr:0.001,loss:1.636,mean_loss:2.288,mean_f1:0.084:  41%|████      | 406/1000 [01:44<02:31,  3.92it/s]

preds:[2, 2, 2, 2],label:[1, 2, 2, 0]


epoch:0,batch:406,lr:0.001,loss:2.1281,mean_loss:2.287,mean_f1:0.084:  41%|███▋     | 407/1000 [01:44<02:42,  3.65it/s]

preds:[2, 2, 2, 2],label:[1, 8, 0, 0]


epoch:0,batch:407,lr:0.001,loss:2.1934,mean_loss:2.287,mean_f1:0.084:  41%|███▋     | 408/1000 [01:44<02:38,  3.73it/s]

preds:[2, 2, 2, 2],label:[3, 8, 0, 2]


epoch:0,batch:408,lr:0.001,loss:2.4419,mean_loss:2.288,mean_f1:0.083:  41%|███▋     | 409/1000 [01:45<02:36,  3.77it/s]

preds:[2, 2, 2, 2],label:[0, 4, 7, 3]


epoch:0,batch:409,lr:0.001,loss:2.0829,mean_loss:2.287,mean_f1:0.083:  41%|███▋     | 410/1000 [01:45<02:32,  3.86it/s]

preds:[2, 2, 2, 2],label:[1, 0, 7, 2]


epoch:0,batch:410,lr:0.001,loss:2.4185,mean_loss:2.287,mean_f1:0.083:  41%|███▋     | 411/1000 [01:45<02:31,  3.89it/s]

preds:[2, 2, 2, 2],label:[1, 5, 0, 5]


epoch:0,batch:411,lr:0.001,loss:2.1609,mean_loss:2.287,mean_f1:0.084:  41%|███▋     | 412/1000 [01:45<02:30,  3.91it/s]

preds:[2, 2, 2, 2],label:[2, 2, 5, 4]


epoch:0,batch:412,lr:0.001,loss:1.7748,mean_loss:2.286,mean_f1:0.084:  41%|███▋     | 413/1000 [01:46<02:28,  3.94it/s]

preds:[2, 2, 2, 2],label:[2, 3, 0, 2]


epoch:0,batch:413,lr:0.001,loss:2.4406,mean_loss:2.286,mean_f1:0.084:  41%|███▋     | 414/1000 [01:46<02:28,  3.96it/s]

preds:[2, 2, 2, 2],label:[10, 0, 4, 2]


epoch:0,batch:414,lr:0.001,loss:1.7618,mean_loss:2.285,mean_f1:0.084:  42%|███▋     | 415/1000 [01:46<02:28,  3.94it/s]

preds:[2, 2, 2, 2],label:[1, 3, 2, 2]


epoch:0,batch:415,lr:0.001,loss:2.1577,mean_loss:2.285,mean_f1:0.084:  42%|███▋     | 416/1000 [01:46<02:26,  3.99it/s]

preds:[2, 2, 2, 2],label:[2, 1, 9, 0]


epoch:0,batch:416,lr:0.001,loss:2.6588,mean_loss:2.286,mean_f1:0.084:  42%|███▊     | 417/1000 [01:47<02:26,  3.97it/s]

preds:[2, 2, 2, 2],label:[7, 2, 6, 4]


epoch:0,batch:417,lr:0.001,loss:1.6127,mean_loss:2.284,mean_f1:0.085:  42%|███▊     | 418/1000 [01:47<02:28,  3.93it/s]

preds:[2, 2, 2, 2],label:[0, 2, 0, 2]


epoch:0,batch:418,lr:0.001,loss:1.8183,mean_loss:2.283,mean_f1:0.085:  42%|███▊     | 419/1000 [01:47<02:27,  3.94it/s]

preds:[2, 2, 2, 2],label:[3, 1, 1, 2]


epoch:0,batch:419,lr:0.001,loss:1.8889,mean_loss:2.282,mean_f1:0.085:  42%|███▊     | 420/1000 [01:47<02:26,  3.95it/s]

preds:[2, 2, 2, 2],label:[1, 3, 1, 1]


epoch:0,batch:420,lr:0.001,loss:2.2924,mean_loss:2.282,mean_f1:0.085:  42%|███▊     | 421/1000 [01:48<02:26,  3.95it/s]

preds:[2, 2, 2, 2],label:[10, 1, 1, 0]


epoch:0,batch:421,lr:0.001,loss:2.4672,mean_loss:2.282,mean_f1:0.085:  42%|███▊     | 422/1000 [01:48<02:26,  3.96it/s]

preds:[2, 2, 2, 2],label:[9, 3, 4, 2]


epoch:0,batch:422,lr:0.001,loss:2.8038,mean_loss:2.284,mean_f1:0.085:  42%|███▊     | 423/1000 [01:48<02:25,  3.96it/s]

preds:[2, 2, 2, 2],label:[7, 5, 6, 2]


epoch:0,batch:423,lr:0.001,loss:2.8298,mean_loss:2.285,mean_f1:0.085:  42%|███▊     | 424/1000 [01:48<02:25,  3.96it/s]

preds:[2, 2, 2, 2],label:[2, 6, 6, 5]


epoch:0,batch:424,lr:0.001,loss:2.0072,mean_loss:2.284,mean_f1:0.085:  42%|███▊     | 425/1000 [01:49<02:23,  4.00it/s]

preds:[2, 2, 2, 2],label:[3, 1, 0, 3]


epoch:0,batch:425,lr:0.001,loss:1.8783,mean_loss:2.283,mean_f1:0.084:  43%|███▊     | 426/1000 [01:49<02:26,  3.93it/s]

preds:[2, 2, 2, 2],label:[3, 0, 1, 0]


epoch:0,batch:426,lr:0.001,loss:1.7553,mean_loss:2.282,mean_f1:0.084:  43%|███▊     | 427/1000 [01:49<02:25,  3.94it/s]

preds:[2, 2, 2, 2],label:[0, 0, 0, 1]


epoch:0,batch:427,lr:0.001,loss:2.5684,mean_loss:2.283,mean_f1:0.084:  43%|███▊     | 428/1000 [01:49<02:24,  3.95it/s]

preds:[2, 2, 2, 2],label:[5, 7, 3, 0]


epoch:0,batch:428,lr:0.001,loss:2.0562,mean_loss:2.282,mean_f1:0.084:  43%|███▊     | 429/1000 [01:50<02:23,  3.98it/s]

preds:[2, 2, 2, 2],label:[1, 0, 6, 2]


epoch:0,batch:429,lr:0.001,loss:2.0425,mean_loss:2.282,mean_f1:0.084:  43%|███▊     | 430/1000 [01:50<02:23,  3.98it/s]

preds:[2, 2, 2, 2],label:[2, 2, 8, 0]


epoch:0,batch:430,lr:0.001,loss:3.8,mean_loss:2.285,mean_f1:0.084:  43%|█████▏      | 431/1000 [01:50<02:24,  3.95it/s]

preds:[2, 2, 2, 2],label:[7, 4, 11, 11]


epoch:0,batch:431,lr:0.001,loss:2.0736,mean_loss:2.285,mean_f1:0.084:  43%|███▉     | 432/1000 [01:51<02:31,  3.76it/s]

preds:[2, 2, 2, 2],label:[7, 1, 0, 0]


epoch:0,batch:432,lr:0.001,loss:2.8106,mean_loss:2.286,mean_f1:0.084:  43%|███▉     | 433/1000 [01:51<02:28,  3.81it/s]

preds:[2, 2, 2, 2],label:[1, 2, 11, 8]


epoch:0,batch:433,lr:0.001,loss:1.6388,mean_loss:2.284,mean_f1:0.084:  43%|███▉     | 434/1000 [01:51<02:24,  3.92it/s]

preds:[2, 2, 2, 2],label:[2, 0, 0, 1]


epoch:0,batch:434,lr:0.001,loss:1.9751,mean_loss:2.284,mean_f1:0.084:  44%|███▉     | 435/1000 [01:51<02:23,  3.93it/s]

preds:[2, 2, 2, 2],label:[0, 5, 1, 0]


epoch:0,batch:435,lr:0.001,loss:2.2908,mean_loss:2.284,mean_f1:0.084:  44%|███▉     | 436/1000 [01:52<02:23,  3.93it/s]

preds:[0, 0, 0, 0],label:[9, 3, 1, 2]


epoch:0,batch:436,lr:0.001,loss:1.7629,mean_loss:2.282,mean_f1:0.084:  44%|███▉     | 437/1000 [01:52<02:22,  3.94it/s]

preds:[0, 0, 0, 0],label:[2, 0, 2, 3]


epoch:0,batch:437,lr:0.001,loss:2.4844,mean_loss:2.283,mean_f1:0.084:  44%|███▉     | 438/1000 [01:52<02:22,  3.94it/s]

preds:[0, 0, 0, 0],label:[4, 3, 8, 0]


epoch:0,batch:438,lr:0.001,loss:2.8536,mean_loss:2.284,mean_f1:0.084:  44%|███▉     | 439/1000 [01:52<02:22,  3.95it/s]

preds:[0, 0, 0, 0],label:[7, 8, 6, 2]


epoch:0,batch:439,lr:0.001,loss:1.9156,mean_loss:2.283,mean_f1:0.084:  44%|███▉     | 440/1000 [01:53<02:21,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 2, 3, 3]


epoch:0,batch:440,lr:0.001,loss:2.5006,mean_loss:2.284,mean_f1:0.084:  44%|███▉     | 441/1000 [01:53<02:20,  3.97it/s]

preds:[0, 0, 0, 0],label:[10, 4, 1, 0]


epoch:0,batch:441,lr:0.001,loss:2.0819,mean_loss:2.283,mean_f1:0.084:  44%|███▉     | 442/1000 [01:53<02:21,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 1, 1, 8]


epoch:0,batch:442,lr:0.001,loss:2.9953,mean_loss:2.285,mean_f1:0.084:  44%|███▉     | 443/1000 [01:53<02:21,  3.94it/s]

preds:[0, 0, 0, 0],label:[10, 0, 12, 1]


epoch:0,batch:443,lr:0.001,loss:2.1351,mean_loss:2.285,mean_f1:0.084:  44%|███▉     | 444/1000 [01:54<02:23,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 0, 9, 2]


epoch:0,batch:444,lr:0.001,loss:2.2012,mean_loss:2.285,mean_f1:0.084:  44%|████     | 445/1000 [01:54<02:21,  3.92it/s]

preds:[0, 0, 0, 0],label:[7, 0, 0, 4]


epoch:0,batch:445,lr:0.001,loss:2.7871,mean_loss:2.286,mean_f1:0.084:  45%|████     | 446/1000 [01:54<02:21,  3.92it/s]

preds:[0, 0, 0, 0],label:[3, 1, 8, 10]


epoch:0,batch:446,lr:0.001,loss:3.1522,mean_loss:2.288,mean_f1:0.084:  45%|████     | 447/1000 [01:54<02:20,  3.94it/s]

preds:[0, 0, 0, 0],label:[11, 6, 3, 4]


epoch:0,batch:447,lr:0.001,loss:2.2762,mean_loss:2.288,mean_f1:0.084:  45%|████     | 448/1000 [01:55<02:21,  3.90it/s]

preds:[0, 0, 0, 0],label:[8, 0, 5, 0]


epoch:0,batch:448,lr:0.001,loss:2.1026,mean_loss:2.287,mean_f1:0.085:  45%|████     | 449/1000 [01:55<02:19,  3.94it/s]

preds:[0, 0, 0, 0],label:[0, 2, 0, 10]


epoch:0,batch:449,lr:0.001,loss:2.5126,mean_loss:2.288,mean_f1:0.084:  45%|████     | 450/1000 [01:55<02:19,  3.95it/s]

preds:[0, 0, 0, 0],label:[3, 10, 3, 1]


epoch:0,batch:450,lr:0.001,loss:2.7434,mean_loss:2.289,mean_f1:0.084:  45%|████     | 451/1000 [01:55<02:19,  3.94it/s]

preds:[0, 0, 0, 0],label:[5, 7, 2, 7]


epoch:0,batch:451,lr:0.001,loss:2.6727,mean_loss:2.29,mean_f1:0.084:  45%|████▌     | 452/1000 [01:56<02:19,  3.94it/s]

preds:[0, 0, 0, 0],label:[7, 1, 8, 4]


epoch:0,batch:452,lr:0.001,loss:2.5569,mean_loss:2.29,mean_f1:0.084:  45%|████▌     | 453/1000 [01:56<02:17,  3.99it/s]

preds:[0, 0, 0, 0],label:[8, 1, 3, 8]


epoch:0,batch:453,lr:0.001,loss:2.4666,mean_loss:2.29,mean_f1:0.084:  45%|████▌     | 454/1000 [01:56<02:17,  3.98it/s]

preds:[0, 0, 0, 0],label:[0, 1, 5, 10]


epoch:0,batch:454,lr:0.001,loss:1.9135,mean_loss:2.29,mean_f1:0.084:  46%|████▌     | 455/1000 [01:56<02:16,  3.98it/s]

preds:[0, 0, 0, 0],label:[8, 1, 0, 0]


epoch:0,batch:455,lr:0.001,loss:2.3429,mean_loss:2.29,mean_f1:0.084:  46%|████▌     | 456/1000 [01:57<02:15,  4.01it/s]

preds:[0, 0, 0, 0],label:[2, 1, 4, 7]


epoch:0,batch:456,lr:0.001,loss:2.2033,mean_loss:2.29,mean_f1:0.084:  46%|████▌     | 457/1000 [01:57<02:24,  3.76it/s]

preds:[0, 0, 0, 0],label:[3, 1, 0, 6]


epoch:0,batch:457,lr:0.001,loss:2.0717,mean_loss:2.289,mean_f1:0.084:  46%|████     | 458/1000 [01:57<02:22,  3.79it/s]

preds:[0, 0, 0, 0],label:[1, 0, 9, 0]


epoch:0,batch:458,lr:0.001,loss:2.7067,mean_loss:2.29,mean_f1:0.084:  46%|████▌     | 459/1000 [01:57<02:20,  3.84it/s]

preds:[0, 0, 0, 0],label:[11, 3, 5, 0]


epoch:0,batch:459,lr:0.001,loss:3.3592,mean_loss:2.292,mean_f1:0.084:  46%|████▏    | 460/1000 [01:58<02:18,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 5, 13, 4]


epoch:0,batch:460,lr:0.001,loss:1.9908,mean_loss:2.292,mean_f1:0.084:  46%|████▏    | 461/1000 [01:58<02:15,  3.98it/s]

preds:[0, 0, 0, 0],label:[3, 2, 3, 0]


epoch:0,batch:461,lr:0.001,loss:1.9558,mean_loss:2.291,mean_f1:0.084:  46%|████▏    | 462/1000 [01:58<02:15,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 1, 2, 2]


epoch:0,batch:462,lr:0.001,loss:2.496,mean_loss:2.291,mean_f1:0.084:  46%|████▋     | 463/1000 [01:58<02:17,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 4, 1, 6]


epoch:0,batch:463,lr:0.001,loss:2.4383,mean_loss:2.292,mean_f1:0.084:  46%|████▏    | 464/1000 [01:59<02:17,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 1, 11, 1]


epoch:0,batch:464,lr:0.001,loss:2.0237,mean_loss:2.291,mean_f1:0.084:  46%|████▏    | 465/1000 [01:59<02:15,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 0, 0, 9]


epoch:0,batch:465,lr:0.001,loss:2.0232,mean_loss:2.291,mean_f1:0.085:  47%|████▏    | 466/1000 [01:59<02:19,  3.83it/s]

preds:[0, 0, 0, 0],label:[3, 3, 0, 3]


epoch:0,batch:466,lr:0.001,loss:2.5752,mean_loss:2.291,mean_f1:0.085:  47%|████▏    | 467/1000 [01:59<02:20,  3.79it/s]

preds:[0, 0, 0, 0],label:[2, 4, 2, 10]


epoch:0,batch:467,lr:0.001,loss:3.36,mean_loss:2.293,mean_f1:0.084:  47%|█████▏     | 468/1000 [02:00<02:20,  3.77it/s]

preds:[0, 0, 0, 0],label:[9, 5, 5, 11]


epoch:0,batch:468,lr:0.001,loss:2.2878,mean_loss:2.293,mean_f1:0.084:  47%|████▏    | 469/1000 [02:00<02:23,  3.70it/s]

preds:[0, 0, 0, 0],label:[3, 6, 2, 0]


epoch:0,batch:469,lr:0.001,loss:2.272,mean_loss:2.293,mean_f1:0.085:  47%|████▋     | 470/1000 [02:00<02:21,  3.74it/s]

preds:[0, 0, 0, 0],label:[0, 6, 2, 2]


epoch:0,batch:470,lr:0.001,loss:2.2527,mean_loss:2.293,mean_f1:0.085:  47%|████▏    | 471/1000 [02:01<02:19,  3.79it/s]

preds:[0, 0, 0, 0],label:[8, 8, 0, 1]


epoch:0,batch:471,lr:0.001,loss:2.2361,mean_loss:2.293,mean_f1:0.084:  47%|████▏    | 472/1000 [02:01<02:17,  3.85it/s]

preds:[0, 0, 0, 0],label:[3, 10, 1, 1]


epoch:0,batch:472,lr:0.001,loss:2.8258,mean_loss:2.294,mean_f1:0.084:  47%|████▎    | 473/1000 [02:01<02:16,  3.85it/s]

preds:[0, 0, 0, 0],label:[1, 11, 8, 8]


epoch:0,batch:473,lr:0.001,loss:1.764,mean_loss:2.293,mean_f1:0.085:  47%|████▋     | 474/1000 [02:01<02:14,  3.91it/s]

preds:[0, 0, 0, 0],label:[0, 0, 3, 1]


epoch:0,batch:474,lr:0.001,loss:2.3199,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 475/1000 [02:02<02:16,  3.85it/s]

preds:[0, 0, 0, 0],label:[0, 2, 4, 5]


epoch:0,batch:475,lr:0.001,loss:2.0232,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 476/1000 [02:02<02:15,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 7, 0, 1]


epoch:0,batch:476,lr:0.001,loss:2.3957,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 477/1000 [02:02<02:17,  3.79it/s]

preds:[0, 0, 0, 0],label:[1, 9, 8, 0]


epoch:0,batch:477,lr:0.001,loss:2.5108,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 478/1000 [02:02<02:17,  3.80it/s]

preds:[0, 0, 0, 0],label:[6, 0, 7, 3]


epoch:0,batch:478,lr:0.001,loss:2.3304,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 479/1000 [02:03<02:16,  3.81it/s]

preds:[0, 0, 0, 0],label:[1, 5, 1, 7]


epoch:0,batch:479,lr:0.001,loss:1.9804,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 480/1000 [02:03<02:17,  3.78it/s]

preds:[0, 0, 0, 0],label:[2, 0, 2, 3]


epoch:0,batch:480,lr:0.001,loss:2.1873,mean_loss:2.293,mean_f1:0.084:  48%|████▎    | 481/1000 [02:03<02:24,  3.58it/s]

preds:[0, 0, 0, 0],label:[4, 1, 3, 3]


epoch:0,batch:481,lr:0.001,loss:2.2449,mean_loss:2.292,mean_f1:0.085:  48%|████▎    | 482/1000 [02:03<02:21,  3.67it/s]

preds:[0, 0, 0, 0],label:[5, 1, 4, 0]


epoch:0,batch:482,lr:0.001,loss:2.4463,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 483/1000 [02:04<02:20,  3.69it/s]

preds:[0, 0, 0, 0],label:[2, 0, 6, 4]


epoch:0,batch:483,lr:0.001,loss:2.6619,mean_loss:2.294,mean_f1:0.085:  48%|████▎    | 484/1000 [02:04<02:16,  3.79it/s]

preds:[0, 0, 0, 0],label:[1, 0, 13, 0]


epoch:0,batch:484,lr:0.001,loss:1.8734,mean_loss:2.293,mean_f1:0.085:  48%|████▎    | 485/1000 [02:04<02:17,  3.75it/s]

preds:[0, 0, 0, 0],label:[0, 2, 0, 2]


epoch:0,batch:485,lr:0.001,loss:1.8772,mean_loss:2.292,mean_f1:0.085:  49%|████▎    | 486/1000 [02:05<02:14,  3.81it/s]

preds:[0, 0, 0, 0],label:[2, 3, 1, 0]


epoch:0,batch:486,lr:0.001,loss:2.0396,mean_loss:2.291,mean_f1:0.085:  49%|████▍    | 487/1000 [02:05<02:11,  3.90it/s]

preds:[0, 0, 0, 0],label:[1, 8, 3, 0]


epoch:0,batch:487,lr:0.001,loss:2.0652,mean_loss:2.291,mean_f1:0.086:  49%|████▍    | 488/1000 [02:05<02:12,  3.87it/s]

preds:[0, 0, 0, 0],label:[0, 0, 10, 0]


epoch:0,batch:488,lr:0.001,loss:2.1879,mean_loss:2.291,mean_f1:0.086:  49%|████▍    | 489/1000 [02:05<02:11,  3.89it/s]

preds:[0, 0, 0, 0],label:[0, 9, 2, 0]


epoch:0,batch:489,lr:0.001,loss:2.0948,mean_loss:2.29,mean_f1:0.086:  49%|████▉     | 490/1000 [02:06<02:08,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 1, 10, 1]


epoch:0,batch:490,lr:0.001,loss:1.8572,mean_loss:2.289,mean_f1:0.087:  49%|████▍    | 491/1000 [02:06<02:08,  3.96it/s]

preds:[0, 0, 0, 0],label:[3, 2, 0, 1]


epoch:0,batch:491,lr:0.001,loss:1.7549,mean_loss:2.288,mean_f1:0.087:  49%|████▍    | 492/1000 [02:06<02:08,  3.94it/s]

preds:[0, 0, 0, 0],label:[1, 0, 3, 1]


epoch:0,batch:492,lr:0.001,loss:1.9494,mean_loss:2.288,mean_f1:0.087:  49%|████▍    | 493/1000 [02:06<02:11,  3.86it/s]

preds:[0, 0, 0, 0],label:[0, 2, 3, 2]


epoch:0,batch:493,lr:0.001,loss:2.2146,mean_loss:2.287,mean_f1:0.087:  49%|████▍    | 494/1000 [02:07<02:12,  3.82it/s]

preds:[0, 0, 0, 0],label:[3, 0, 1, 9]


epoch:0,batch:494,lr:0.001,loss:3.0319,mean_loss:2.289,mean_f1:0.087:  50%|████▍    | 495/1000 [02:07<02:12,  3.80it/s]

preds:[0, 0, 0, 0],label:[8, 13, 2, 1]


epoch:0,batch:495,lr:0.001,loss:2.1091,mean_loss:2.289,mean_f1:0.087:  50%|████▍    | 496/1000 [02:07<02:12,  3.79it/s]

preds:[0, 0, 0, 0],label:[1, 6, 0, 2]


epoch:0,batch:496,lr:0.001,loss:1.8342,mean_loss:2.288,mean_f1:0.087:  50%|████▍    | 497/1000 [02:07<02:13,  3.76it/s]

preds:[0, 0, 0, 0],label:[0, 8, 0, 1]


epoch:0,batch:497,lr:0.001,loss:2.9855,mean_loss:2.289,mean_f1:0.087:  50%|████▍    | 498/1000 [02:08<02:14,  3.74it/s]

preds:[0, 0, 0, 0],label:[4, 11, 2, 4]


epoch:0,batch:498,lr:0.001,loss:2.4372,mean_loss:2.289,mean_f1:0.087:  50%|████▍    | 499/1000 [02:08<02:14,  3.73it/s]

preds:[0, 0, 0, 0],label:[8, 10, 1, 1]


epoch:0,batch:499,lr:0.001,loss:2.608,mean_loss:2.29,mean_f1:0.087:  50%|█████▌     | 500/1000 [02:08<02:13,  3.75it/s]

preds:[0, 0, 0, 0],label:[5, 0, 11, 2]


epoch:0,batch:500,lr:0.001,loss:2.5193,mean_loss:2.29,mean_f1:0.086:  50%|█████     | 501/1000 [02:08<02:13,  3.73it/s]

preds:[0, 0, 0, 0],label:[8, 6, 1, 3]


epoch:0,batch:501,lr:0.001,loss:2.0679,mean_loss:2.29,mean_f1:0.087:  50%|█████     | 502/1000 [02:09<02:14,  3.71it/s]

preds:[0, 0, 0, 0],label:[1, 0, 11, 0]


epoch:0,batch:502,lr:0.001,loss:1.9676,mean_loss:2.289,mean_f1:0.087:  50%|████▌    | 503/1000 [02:09<02:13,  3.71it/s]

preds:[0, 0, 0, 0],label:[2, 3, 3, 0]


epoch:0,batch:503,lr:0.001,loss:2.2308,mean_loss:2.289,mean_f1:0.087:  50%|████▌    | 504/1000 [02:09<02:13,  3.73it/s]

preds:[0, 0, 0, 0],label:[2, 3, 0, 6]


epoch:0,batch:504,lr:0.001,loss:2.4871,mean_loss:2.29,mean_f1:0.087:  50%|█████     | 505/1000 [02:10<02:17,  3.60it/s]

preds:[0, 0, 0, 0],label:[7, 3, 8, 0]


epoch:0,batch:505,lr:0.001,loss:2.0078,mean_loss:2.289,mean_f1:0.087:  51%|████▌    | 506/1000 [02:10<02:16,  3.61it/s]

preds:[0, 0, 0, 0],label:[0, 2, 1, 4]


epoch:0,batch:506,lr:0.001,loss:1.8853,mean_loss:2.288,mean_f1:0.087:  51%|████▌    | 507/1000 [02:10<02:12,  3.71it/s]

preds:[0, 0, 0, 0],label:[3, 1, 1, 2]


epoch:0,batch:507,lr:0.001,loss:2.8981,mean_loss:2.289,mean_f1:0.086:  51%|████▌    | 508/1000 [02:10<02:11,  3.75it/s]

preds:[0, 0, 0, 0],label:[10, 10, 8, 1]


epoch:0,batch:508,lr:0.001,loss:2.2605,mean_loss:2.289,mean_f1:0.086:  51%|████▌    | 509/1000 [02:11<02:08,  3.82it/s]

preds:[0, 0, 0, 0],label:[0, 1, 5, 8]


epoch:0,batch:509,lr:0.001,loss:1.9149,mean_loss:2.289,mean_f1:0.086:  51%|████▌    | 510/1000 [02:11<02:10,  3.76it/s]

preds:[0, 0, 0, 0],label:[2, 2, 1, 2]


epoch:0,batch:510,lr:0.001,loss:2.2469,mean_loss:2.289,mean_f1:0.086:  51%|████▌    | 511/1000 [02:11<02:10,  3.76it/s]

preds:[0, 0, 0, 0],label:[3, 6, 0, 3]


epoch:0,batch:511,lr:0.001,loss:2.4726,mean_loss:2.289,mean_f1:0.086:  51%|████▌    | 512/1000 [02:11<02:07,  3.82it/s]

preds:[0, 0, 0, 0],label:[8, 0, 9, 2]


epoch:0,batch:512,lr:0.001,loss:2.0175,mean_loss:2.288,mean_f1:0.086:  51%|████▌    | 513/1000 [02:12<02:05,  3.87it/s]

preds:[0, 0, 0, 0],label:[4, 0, 1, 2]


epoch:0,batch:513,lr:0.001,loss:2.157,mean_loss:2.288,mean_f1:0.086:  51%|█████▏    | 514/1000 [02:12<02:05,  3.88it/s]

preds:[0, 0, 0, 0],label:[1, 3, 5, 1]


epoch:0,batch:514,lr:0.001,loss:1.7974,mean_loss:2.287,mean_f1:0.087:  52%|████▋    | 515/1000 [02:12<02:04,  3.91it/s]

preds:[0, 0, 0, 0],label:[8, 0, 0, 0]


epoch:0,batch:515,lr:0.001,loss:2.5551,mean_loss:2.288,mean_f1:0.087:  52%|████▋    | 516/1000 [02:12<02:03,  3.91it/s]

preds:[0, 0, 0, 0],label:[6, 7, 0, 2]


epoch:0,batch:516,lr:0.001,loss:2.2792,mean_loss:2.288,mean_f1:0.087:  52%|████▋    | 517/1000 [02:13<02:05,  3.85it/s]

preds:[0, 0, 0, 0],label:[3, 1, 1, 9]


epoch:0,batch:517,lr:0.001,loss:2.0411,mean_loss:2.287,mean_f1:0.087:  52%|████▋    | 518/1000 [02:13<02:04,  3.88it/s]

preds:[0, 0, 0, 0],label:[4, 0, 3, 0]


epoch:0,batch:518,lr:0.001,loss:1.9342,mean_loss:2.287,mean_f1:0.087:  52%|████▋    | 519/1000 [02:13<02:04,  3.87it/s]

preds:[0, 0, 0, 0],label:[0, 8, 2, 1]


epoch:0,batch:519,lr:0.001,loss:2.1427,mean_loss:2.286,mean_f1:0.087:  52%|████▋    | 520/1000 [02:13<02:06,  3.80it/s]

preds:[0, 0, 0, 0],label:[0, 2, 3, 4]


epoch:0,batch:520,lr:0.001,loss:1.6079,mean_loss:2.285,mean_f1:0.087:  52%|████▋    | 521/1000 [02:14<02:04,  3.84it/s]

preds:[0, 0, 0, 0],label:[1, 0, 1, 1]


epoch:0,batch:521,lr:0.001,loss:1.648,mean_loss:2.284,mean_f1:0.088:  52%|█████▏    | 522/1000 [02:14<02:05,  3.82it/s]

preds:[0, 0, 0, 0],label:[0, 0, 3, 0]


epoch:0,batch:522,lr:0.001,loss:2.7515,mean_loss:2.285,mean_f1:0.088:  52%|████▋    | 523/1000 [02:14<02:05,  3.79it/s]

preds:[0, 0, 0, 0],label:[3, 5, 10, 3]


epoch:0,batch:523,lr:0.001,loss:2.8646,mean_loss:2.286,mean_f1:0.088:  52%|████▋    | 524/1000 [02:14<02:05,  3.80it/s]

preds:[0, 0, 0, 0],label:[1, 5, 4, 10]


epoch:0,batch:524,lr:0.001,loss:2.0199,mean_loss:2.285,mean_f1:0.088:  52%|████▋    | 525/1000 [02:15<02:05,  3.79it/s]

preds:[0, 0, 0, 0],label:[2, 4, 1, 0]


epoch:0,batch:525,lr:0.001,loss:2.6205,mean_loss:2.286,mean_f1:0.088:  53%|████▋    | 526/1000 [02:15<02:04,  3.80it/s]

preds:[0, 0, 0, 0],label:[6, 9, 0, 3]


epoch:0,batch:526,lr:0.001,loss:3.1963,mean_loss:2.288,mean_f1:0.088:  53%|████▋    | 527/1000 [02:15<02:03,  3.83it/s]

preds:[0, 0, 0, 0],label:[6, 8, 1, 12]


epoch:0,batch:527,lr:0.001,loss:2.8722,mean_loss:2.289,mean_f1:0.087:  53%|████▊    | 528/1000 [02:16<02:02,  3.86it/s]

preds:[0, 0, 0, 0],label:[7, 3, 3, 9]


epoch:0,batch:528,lr:0.001,loss:2.6614,mean_loss:2.289,mean_f1:0.087:  53%|████▊    | 529/1000 [02:16<02:07,  3.69it/s]

preds:[0, 0, 0, 0],label:[1, 1, 3, 13]


epoch:0,batch:529,lr:0.001,loss:2.9001,mean_loss:2.291,mean_f1:0.087:  53%|████▊    | 530/1000 [02:16<02:06,  3.71it/s]

preds:[0, 0, 0, 0],label:[5, 5, 7, 0]


epoch:0,batch:530,lr:0.001,loss:2.0248,mean_loss:2.29,mean_f1:0.087:  53%|█████▎    | 531/1000 [02:16<02:06,  3.72it/s]

preds:[0, 0, 0, 0],label:[5, 1, 1, 1]


epoch:0,batch:531,lr:0.001,loss:1.5232,mean_loss:2.289,mean_f1:0.089:  53%|████▊    | 532/1000 [02:17<02:05,  3.73it/s]

preds:[0, 0, 0, 0],label:[0, 0, 0, 0]


epoch:0,batch:532,lr:0.001,loss:3.059,mean_loss:2.29,mean_f1:0.089:  53%|█████▊     | 533/1000 [02:17<02:04,  3.74it/s]

preds:[0, 0, 0, 0],label:[6, 10, 9, 2]


epoch:0,batch:533,lr:0.001,loss:1.5812,mean_loss:2.289,mean_f1:0.089:  53%|████▊    | 534/1000 [02:17<02:05,  3.73it/s]

preds:[0, 0, 0, 0],label:[1, 1, 0, 1]


epoch:0,batch:534,lr:0.001,loss:2.1365,mean_loss:2.289,mean_f1:0.089:  54%|████▊    | 535/1000 [02:17<02:00,  3.85it/s]

preds:[0, 0, 0, 0],label:[2, 2, 0, 8]


epoch:0,batch:535,lr:0.001,loss:2.2988,mean_loss:2.289,mean_f1:0.089:  54%|████▊    | 536/1000 [02:18<02:00,  3.85it/s]

preds:[0, 0, 0, 0],label:[5, 5, 0, 0]


epoch:0,batch:536,lr:0.001,loss:2.4424,mean_loss:2.289,mean_f1:0.089:  54%|████▊    | 537/1000 [02:18<01:58,  3.90it/s]

preds:[0, 0, 0, 0],label:[8, 10, 3, 1]


epoch:0,batch:537,lr:0.001,loss:2.4865,mean_loss:2.289,mean_f1:0.089:  54%|████▊    | 538/1000 [02:18<01:57,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 8, 7, 0]


epoch:0,batch:538,lr:0.001,loss:2.36,mean_loss:2.289,mean_f1:0.089:  54%|█████▉     | 539/1000 [02:18<01:55,  4.00it/s]

preds:[0, 0, 0, 0],label:[6, 3, 3, 2]


epoch:0,batch:539,lr:0.001,loss:2.5724,mean_loss:2.29,mean_f1:0.089:  54%|█████▍    | 540/1000 [02:19<01:53,  4.05it/s]

preds:[0, 0, 0, 0],label:[2, 3, 4, 4]


epoch:0,batch:540,lr:0.001,loss:1.8489,mean_loss:2.289,mean_f1:0.089:  54%|████▊    | 541/1000 [02:19<01:54,  4.03it/s]

preds:[0, 0, 0, 0],label:[0, 2, 0, 2]


epoch:0,batch:541,lr:0.001,loss:1.7019,mean_loss:2.288,mean_f1:0.09:  54%|█████▍    | 542/1000 [02:19<01:58,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 2, 0, 0]


epoch:0,batch:542,lr:0.001,loss:2.2107,mean_loss:2.288,mean_f1:0.09:  54%|█████▍    | 543/1000 [02:19<01:56,  3.93it/s]

preds:[0, 0, 0, 0],label:[2, 0, 4, 2]


epoch:0,batch:543,lr:0.001,loss:2.065,mean_loss:2.287,mean_f1:0.09:  54%|█████▉     | 544/1000 [02:20<01:54,  3.97it/s]

preds:[0, 0, 0, 0],label:[0, 9, 1, 1]


epoch:0,batch:544,lr:0.001,loss:2.2398,mean_loss:2.287,mean_f1:0.09:  55%|█████▍    | 545/1000 [02:20<01:54,  3.99it/s]

preds:[0, 0, 0, 0],label:[0, 4, 4, 0]


epoch:0,batch:545,lr:0.001,loss:1.9645,mean_loss:2.287,mean_f1:0.091:  55%|████▉    | 546/1000 [02:20<01:54,  3.98it/s]

preds:[0, 0, 0, 0],label:[0, 2, 8, 0]


epoch:0,batch:546,lr:0.001,loss:2.1038,mean_loss:2.286,mean_f1:0.091:  55%|████▉    | 547/1000 [02:20<01:54,  3.97it/s]

preds:[0, 0, 0, 0],label:[2, 0, 6, 1]


epoch:0,batch:547,lr:0.001,loss:2.9115,mean_loss:2.288,mean_f1:0.09:  55%|█████▍    | 548/1000 [02:21<01:54,  3.95it/s]

preds:[0, 0, 0, 0],label:[1, 8, 2, 13]


epoch:0,batch:548,lr:0.001,loss:2.5361,mean_loss:2.288,mean_f1:0.09:  55%|█████▍    | 549/1000 [02:21<01:53,  3.97it/s]

preds:[0, 0, 0, 0],label:[2, 8, 3, 6]


epoch:0,batch:549,lr:0.001,loss:2.0892,mean_loss:2.288,mean_f1:0.09:  55%|█████▌    | 550/1000 [02:21<01:53,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 9, 0, 1]


epoch:0,batch:550,lr:0.001,loss:2.4823,mean_loss:2.288,mean_f1:0.091:  55%|████▉    | 551/1000 [02:21<01:52,  3.99it/s]

preds:[0, 0, 0, 0],label:[0, 2, 12, 0]


epoch:0,batch:551,lr:0.001,loss:1.9456,mean_loss:2.287,mean_f1:0.091:  55%|████▉    | 552/1000 [02:22<01:51,  4.03it/s]

preds:[0, 0, 0, 0],label:[0, 1, 4, 1]


epoch:0,batch:552,lr:0.001,loss:1.5693,mean_loss:2.286,mean_f1:0.091:  55%|████▉    | 553/1000 [02:22<01:51,  4.00it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 1]


epoch:0,batch:553,lr:0.001,loss:2.1037,mean_loss:2.286,mean_f1:0.091:  55%|████▉    | 554/1000 [02:22<01:59,  3.74it/s]

preds:[0, 0, 0, 0],label:[2, 0, 0, 7]


epoch:0,batch:554,lr:0.001,loss:1.888,mean_loss:2.285,mean_f1:0.091:  56%|█████▌    | 555/1000 [02:22<01:57,  3.77it/s]

preds:[0, 0, 0, 0],label:[0, 1, 3, 3]


epoch:0,batch:555,lr:0.001,loss:2.4306,mean_loss:2.285,mean_f1:0.091:  56%|█████    | 556/1000 [02:23<01:55,  3.85it/s]

preds:[0, 0, 0, 0],label:[0, 9, 1, 6]


epoch:0,batch:556,lr:0.001,loss:2.3818,mean_loss:2.285,mean_f1:0.091:  56%|█████    | 557/1000 [02:23<01:52,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 4, 5, 3]


epoch:0,batch:557,lr:0.001,loss:2.5568,mean_loss:2.286,mean_f1:0.091:  56%|█████    | 558/1000 [02:23<01:51,  3.96it/s]

preds:[0, 0, 0, 0],label:[4, 5, 2, 3]


epoch:0,batch:558,lr:0.001,loss:1.633,mean_loss:2.285,mean_f1:0.091:  56%|█████▌    | 559/1000 [02:23<01:51,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 1, 1, 0]


epoch:0,batch:559,lr:0.001,loss:1.7317,mean_loss:2.284,mean_f1:0.092:  56%|█████    | 560/1000 [02:24<01:50,  3.99it/s]

preds:[0, 0, 0, 0],label:[0, 2, 3, 0]


epoch:0,batch:560,lr:0.001,loss:2.2739,mean_loss:2.284,mean_f1:0.091:  56%|█████    | 561/1000 [02:24<01:50,  3.97it/s]

preds:[0, 0, 0, 0],label:[3, 2, 5, 1]


epoch:0,batch:561,lr:0.001,loss:3.0187,mean_loss:2.285,mean_f1:0.091:  56%|█████    | 562/1000 [02:24<01:50,  3.96it/s]

preds:[0, 0, 0, 0],label:[10, 1, 7, 5]


epoch:0,batch:562,lr:0.001,loss:2.1412,mean_loss:2.285,mean_f1:0.091:  56%|█████    | 563/1000 [02:24<01:49,  3.98it/s]

preds:[0, 0, 0, 0],label:[2, 6, 2, 0]


epoch:0,batch:563,lr:0.001,loss:1.8465,mean_loss:2.284,mean_f1:0.092:  56%|█████    | 564/1000 [02:25<01:49,  3.99it/s]

preds:[0, 0, 0, 0],label:[2, 0, 2, 2]


epoch:0,batch:564,lr:0.001,loss:2.9447,mean_loss:2.285,mean_f1:0.091:  56%|█████    | 565/1000 [02:25<01:48,  4.00it/s]

preds:[0, 0, 0, 0],label:[4, 12, 2, 2]


epoch:0,batch:565,lr:0.001,loss:3.2203,mean_loss:2.287,mean_f1:0.091:  57%|█████    | 566/1000 [02:25<01:48,  4.01it/s]

preds:[0, 0, 0, 0],label:[6, 2, 12, 4]


epoch:0,batch:566,lr:0.001,loss:2.5279,mean_loss:2.287,mean_f1:0.091:  57%|█████    | 567/1000 [02:25<01:49,  3.94it/s]

preds:[0, 0, 0, 0],label:[2, 6, 7, 0]


epoch:0,batch:567,lr:0.001,loss:2.3469,mean_loss:2.287,mean_f1:0.092:  57%|█████    | 568/1000 [02:26<01:49,  3.95it/s]

preds:[0, 0, 0, 0],label:[10, 0, 5, 0]


epoch:0,batch:568,lr:0.001,loss:1.5631,mean_loss:2.286,mean_f1:0.092:  57%|█████    | 569/1000 [02:26<01:48,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 1, 1, 0]


epoch:0,batch:569,lr:0.001,loss:1.5905,mean_loss:2.285,mean_f1:0.092:  57%|█████▏   | 570/1000 [02:26<01:48,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 2]


epoch:0,batch:570,lr:0.001,loss:2.3194,mean_loss:2.285,mean_f1:0.092:  57%|█████▏   | 571/1000 [02:26<01:48,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 5, 2, 6]


epoch:0,batch:571,lr:0.001,loss:1.7277,mean_loss:2.284,mean_f1:0.092:  57%|█████▏   | 572/1000 [02:27<01:47,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 2, 3, 0]


epoch:0,batch:572,lr:0.001,loss:2.0119,mean_loss:2.283,mean_f1:0.093:  57%|█████▏   | 573/1000 [02:27<01:47,  3.97it/s]

preds:[0, 0, 0, 0],label:[1, 0, 9, 0]


epoch:0,batch:573,lr:0.001,loss:2.1836,mean_loss:2.283,mean_f1:0.092:  57%|█████▏   | 574/1000 [02:27<01:47,  3.97it/s]

preds:[0, 0, 0, 0],label:[4, 2, 2, 3]


epoch:0,batch:574,lr:0.001,loss:2.9699,mean_loss:2.285,mean_f1:0.092:  57%|█████▏   | 575/1000 [02:28<01:48,  3.92it/s]

preds:[0, 0, 0, 0],label:[8, 4, 9, 3]


epoch:0,batch:575,lr:0.001,loss:1.8261,mean_loss:2.284,mean_f1:0.093:  58%|█████▏   | 576/1000 [02:28<01:47,  3.94it/s]

preds:[0, 0, 0, 0],label:[1, 0, 0, 4]


epoch:0,batch:576,lr:0.001,loss:1.8165,mean_loss:2.283,mean_f1:0.092:  58%|█████▏   | 577/1000 [02:28<01:46,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 2, 1, 1]


epoch:0,batch:577,lr:0.001,loss:2.7092,mean_loss:2.284,mean_f1:0.092:  58%|█████▏   | 578/1000 [02:28<01:48,  3.91it/s]

preds:[0, 0, 0, 0],label:[6, 5, 5, 1]


epoch:0,batch:578,lr:0.001,loss:1.5388,mean_loss:2.282,mean_f1:0.093:  58%|█████▏   | 579/1000 [02:29<01:52,  3.74it/s]

preds:[0, 0, 0, 0],label:[0, 2, 0, 2]


epoch:0,batch:579,lr:0.001,loss:2.4732,mean_loss:2.283,mean_f1:0.092:  58%|█████▏   | 580/1000 [02:29<01:52,  3.73it/s]

preds:[0, 0, 0, 0],label:[4, 7, 2, 2]


epoch:0,batch:580,lr:0.001,loss:1.796,mean_loss:2.282,mean_f1:0.092:  58%|█████▊    | 581/1000 [02:29<01:49,  3.82it/s]

preds:[0, 0, 0, 0],label:[1, 1, 1, 2]


epoch:0,batch:581,lr:0.001,loss:1.7145,mean_loss:2.281,mean_f1:0.092:  58%|█████▏   | 582/1000 [02:29<01:47,  3.88it/s]

preds:[0, 0, 0, 0],label:[2, 1, 2, 2]


epoch:0,batch:582,lr:0.001,loss:1.5028,mean_loss:2.28,mean_f1:0.093:  58%|█████▊    | 583/1000 [02:30<01:47,  3.90it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 0]


epoch:0,batch:583,lr:0.001,loss:2.5609,mean_loss:2.28,mean_f1:0.093:  58%|█████▊    | 584/1000 [02:30<01:45,  3.93it/s]

preds:[0, 0, 0, 0],label:[10, 2, 0, 9]


epoch:0,batch:584,lr:0.001,loss:1.9333,mean_loss:2.279,mean_f1:0.093:  58%|█████▎   | 585/1000 [02:30<01:46,  3.91it/s]

preds:[0, 0, 0, 0],label:[2, 1, 2, 4]


epoch:0,batch:585,lr:0.001,loss:2.0256,mean_loss:2.279,mean_f1:0.093:  59%|█████▎   | 586/1000 [02:30<01:44,  3.97it/s]

preds:[0, 0, 0, 0],label:[0, 2, 2, 9]


epoch:0,batch:586,lr:0.001,loss:1.878,mean_loss:2.278,mean_f1:0.093:  59%|█████▊    | 587/1000 [02:31<01:44,  3.97it/s]

preds:[0, 0, 0, 0],label:[5, 2, 2, 0]


epoch:0,batch:587,lr:0.001,loss:1.6134,mean_loss:2.277,mean_f1:0.093:  59%|█████▎   | 588/1000 [02:31<01:44,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 1, 0, 1]


epoch:0,batch:588,lr:0.001,loss:2.1632,mean_loss:2.277,mean_f1:0.093:  59%|█████▎   | 589/1000 [02:31<01:43,  3.97it/s]

preds:[0, 0, 0, 0],label:[2, 4, 0, 5]


epoch:0,batch:589,lr:0.001,loss:2.408,mean_loss:2.277,mean_f1:0.093:  59%|█████▉    | 590/1000 [02:31<01:42,  4.00it/s]

preds:[0, 0, 0, 0],label:[5, 1, 2, 9]


epoch:0,batch:590,lr:0.001,loss:2.161,mean_loss:2.277,mean_f1:0.093:  59%|█████▉    | 591/1000 [02:32<01:42,  3.99it/s]

preds:[2, 2, 2, 2],label:[2, 4, 1, 3]


epoch:0,batch:591,lr:0.001,loss:2.0505,mean_loss:2.277,mean_f1:0.093:  59%|█████▎   | 592/1000 [02:32<01:42,  3.98it/s]

preds:[2, 2, 2, 2],label:[1, 5, 1, 1]


epoch:0,batch:592,lr:0.001,loss:2.1498,mean_loss:2.276,mean_f1:0.093:  59%|█████▎   | 593/1000 [02:32<01:44,  3.88it/s]

preds:[2, 2, 2, 2],label:[1, 1, 9, 1]


epoch:0,batch:593,lr:0.001,loss:2.4006,mean_loss:2.277,mean_f1:0.093:  59%|█████▎   | 594/1000 [02:32<01:43,  3.92it/s]

preds:[2, 2, 2, 2],label:[0, 7, 0, 3]


epoch:0,batch:594,lr:0.001,loss:2.292,mean_loss:2.277,mean_f1:0.092:  60%|█████▉    | 595/1000 [02:33<01:42,  3.93it/s]

preds:[2, 2, 2, 2],label:[6, 0, 5, 0]


epoch:0,batch:595,lr:0.001,loss:3.2432,mean_loss:2.278,mean_f1:0.092:  60%|█████▎   | 596/1000 [02:33<01:41,  3.97it/s]

preds:[2, 2, 2, 2],label:[11, 4, 3, 4]


epoch:0,batch:596,lr:0.001,loss:1.552,mean_loss:2.277,mean_f1:0.093:  60%|█████▉    | 597/1000 [02:33<01:41,  3.97it/s]

preds:[2, 2, 2, 2],label:[2, 2, 1, 1]


epoch:0,batch:597,lr:0.001,loss:2.186,mean_loss:2.277,mean_f1:0.092:  60%|█████▉    | 598/1000 [02:33<01:40,  3.98it/s]

preds:[2, 2, 2, 2],label:[4, 1, 1, 5]


epoch:0,batch:598,lr:0.001,loss:1.8919,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 599/1000 [02:34<01:40,  4.00it/s]

preds:[2, 2, 2, 2],label:[0, 0, 0, 3]


epoch:0,batch:599,lr:0.001,loss:1.8442,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 600/1000 [02:34<01:41,  3.96it/s]

preds:[2, 2, 2, 2],label:[0, 4, 0, 2]


epoch:0,batch:600,lr:0.001,loss:2.7506,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 601/1000 [02:34<01:41,  3.95it/s]

preds:[2, 2, 2, 2],label:[3, 4, 3, 3]


epoch:0,batch:601,lr:0.001,loss:1.8599,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 602/1000 [02:34<01:40,  3.97it/s]

preds:[2, 2, 2, 2],label:[2, 1, 0, 3]


epoch:0,batch:602,lr:0.001,loss:2.3774,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 603/1000 [02:35<01:40,  3.96it/s]

preds:[2, 2, 2, 2],label:[10, 1, 4, 1]


epoch:0,batch:603,lr:0.001,loss:2.4317,mean_loss:2.276,mean_f1:0.092:  60%|█████▍   | 604/1000 [02:35<01:46,  3.72it/s]

preds:[1, 1, 1, 1],label:[1, 3, 3, 5]


epoch:0,batch:604,lr:0.001,loss:1.5424,mean_loss:2.275,mean_f1:0.093:  60%|█████▍   | 605/1000 [02:35<01:44,  3.79it/s]

preds:[1, 1, 1, 1],label:[1, 1, 0, 1]


epoch:0,batch:605,lr:0.001,loss:2.5801,mean_loss:2.275,mean_f1:0.093:  61%|█████▍   | 606/1000 [02:35<01:43,  3.82it/s]

preds:[1, 1, 1, 1],label:[3, 5, 6, 2]


epoch:0,batch:606,lr:0.001,loss:2.6725,mean_loss:2.276,mean_f1:0.092:  61%|█████▍   | 607/1000 [02:36<01:41,  3.87it/s]

preds:[1, 1, 1, 1],label:[0, 3, 3, 7]


epoch:0,batch:607,lr:0.001,loss:1.6013,mean_loss:2.275,mean_f1:0.092:  61%|█████▍   | 608/1000 [02:36<01:41,  3.87it/s]

preds:[1, 1, 1, 1],label:[2, 1, 0, 0]


epoch:0,batch:608,lr:0.001,loss:2.8307,mean_loss:2.276,mean_f1:0.092:  61%|█████▍   | 609/1000 [02:36<01:41,  3.84it/s]

preds:[1, 1, 1, 1],label:[11, 6, 0, 0]


epoch:0,batch:609,lr:0.001,loss:2.9869,mean_loss:2.277,mean_f1:0.092:  61%|█████▍   | 610/1000 [02:36<01:40,  3.88it/s]

preds:[1, 1, 1, 1],label:[6, 0, 8, 6]


epoch:0,batch:610,lr:0.001,loss:3.5749,mean_loss:2.279,mean_f1:0.092:  61%|█████▍   | 611/1000 [02:37<01:38,  3.93it/s]

preds:[1, 1, 1, 1],label:[4, 5, 13, 8]


epoch:0,batch:611,lr:0.001,loss:2.753,mean_loss:2.28,mean_f1:0.092:  61%|██████▋    | 612/1000 [02:37<01:38,  3.94it/s]

preds:[1, 1, 1, 1],label:[5, 2, 5, 10]


epoch:0,batch:612,lr:0.001,loss:2.1197,mean_loss:2.28,mean_f1:0.092:  61%|██████▏   | 613/1000 [02:37<01:37,  3.97it/s]

preds:[1, 1, 1, 1],label:[0, 5, 2, 3]


epoch:0,batch:613,lr:0.001,loss:1.7854,mean_loss:2.279,mean_f1:0.092:  61%|█████▌   | 614/1000 [02:37<01:35,  4.03it/s]

preds:[1, 1, 1, 1],label:[1, 4, 1, 0]


epoch:0,batch:614,lr:0.001,loss:2.2788,mean_loss:2.279,mean_f1:0.092:  62%|█████▌   | 615/1000 [02:38<01:35,  4.02it/s]

preds:[1, 1, 1, 1],label:[2, 6, 0, 4]


epoch:0,batch:615,lr:0.001,loss:1.9902,mean_loss:2.278,mean_f1:0.092:  62%|█████▌   | 616/1000 [02:38<01:36,  3.97it/s]

preds:[1, 1, 1, 1],label:[6, 1, 1, 2]


epoch:0,batch:616,lr:0.001,loss:2.4035,mean_loss:2.279,mean_f1:0.092:  62%|█████▌   | 617/1000 [02:38<01:37,  3.94it/s]

preds:[1, 1, 1, 1],label:[3, 0, 7, 2]


epoch:0,batch:617,lr:0.001,loss:2.0483,mean_loss:2.278,mean_f1:0.092:  62%|█████▌   | 618/1000 [02:38<01:36,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 2, 4, 3]


epoch:0,batch:618,lr:0.001,loss:2.0345,mean_loss:2.278,mean_f1:0.092:  62%|█████▌   | 619/1000 [02:39<01:36,  3.94it/s]

preds:[1, 1, 1, 1],label:[6, 0, 0, 2]


epoch:0,batch:619,lr:0.001,loss:2.6407,mean_loss:2.278,mean_f1:0.092:  62%|█████▌   | 620/1000 [02:39<01:36,  3.93it/s]

preds:[1, 1, 1, 1],label:[3, 13, 1, 0]


epoch:0,batch:620,lr:0.001,loss:2.827,mean_loss:2.279,mean_f1:0.092:  62%|██████▏   | 621/1000 [02:39<01:36,  3.94it/s]

preds:[1, 1, 1, 1],label:[6, 0, 0, 13]


epoch:0,batch:621,lr:0.001,loss:2.233,mean_loss:2.279,mean_f1:0.092:  62%|██████▏   | 622/1000 [02:39<01:35,  3.94it/s]

preds:[1, 1, 1, 1],label:[3, 5, 1, 4]


epoch:0,batch:622,lr:0.001,loss:2.2287,mean_loss:2.279,mean_f1:0.092:  62%|█████▌   | 623/1000 [02:40<01:35,  3.95it/s]

preds:[1, 1, 1, 1],label:[2, 2, 8, 1]


epoch:0,batch:623,lr:0.001,loss:1.5974,mean_loss:2.278,mean_f1:0.092:  62%|█████▌   | 624/1000 [02:40<01:35,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 0, 1, 1]


epoch:0,batch:624,lr:0.001,loss:2.0007,mean_loss:2.278,mean_f1:0.092:  62%|█████▋   | 625/1000 [02:40<01:35,  3.93it/s]

preds:[0, 0, 0, 0],label:[3, 1, 5, 0]


epoch:0,batch:625,lr:0.001,loss:2.1465,mean_loss:2.277,mean_f1:0.092:  63%|█████▋   | 626/1000 [02:41<01:34,  3.95it/s]

preds:[0, 0, 0, 0],label:[3, 3, 4, 0]


epoch:0,batch:626,lr:0.001,loss:2.9224,mean_loss:2.278,mean_f1:0.092:  63%|█████▋   | 627/1000 [02:41<01:34,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 11, 0, 7]


epoch:0,batch:627,lr:0.001,loss:2.6391,mean_loss:2.279,mean_f1:0.092:  63%|█████▋   | 628/1000 [02:41<01:37,  3.80it/s]

preds:[0, 0, 0, 0],label:[2, 4, 8, 4]


epoch:0,batch:628,lr:0.001,loss:3.0025,mean_loss:2.28,mean_f1:0.092:  63%|██████▎   | 629/1000 [02:41<01:39,  3.72it/s]

preds:[0, 0, 0, 0],label:[6, 0, 6, 11]


epoch:0,batch:629,lr:0.001,loss:2.7651,mean_loss:2.281,mean_f1:0.092:  63%|█████▋   | 630/1000 [02:42<01:37,  3.80it/s]

preds:[0, 0, 0, 0],label:[1, 6, 11, 2]


epoch:0,batch:630,lr:0.001,loss:2.5229,mean_loss:2.281,mean_f1:0.092:  63%|█████▋   | 631/1000 [02:42<01:35,  3.86it/s]

preds:[0, 0, 0, 0],label:[3, 3, 2, 7]


epoch:0,batch:631,lr:0.001,loss:3.125,mean_loss:2.283,mean_f1:0.092:  63%|██████▎   | 632/1000 [02:42<01:34,  3.89it/s]

preds:[0, 0, 0, 0],label:[4, 12, 8, 0]


epoch:0,batch:632,lr:0.001,loss:2.4631,mean_loss:2.283,mean_f1:0.092:  63%|█████▋   | 633/1000 [02:42<01:34,  3.90it/s]

preds:[0, 0, 0, 0],label:[2, 4, 7, 1]


epoch:0,batch:633,lr:0.001,loss:2.4808,mean_loss:2.283,mean_f1:0.092:  63%|█████▋   | 634/1000 [02:43<01:33,  3.90it/s]

preds:[0, 0, 0, 0],label:[9, 2, 1, 4]


epoch:0,batch:634,lr:0.001,loss:1.788,mean_loss:2.282,mean_f1:0.092:  64%|██████▎   | 635/1000 [02:43<01:33,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 0, 1, 3]


epoch:0,batch:635,lr:0.001,loss:2.8266,mean_loss:2.283,mean_f1:0.092:  64%|█████▋   | 636/1000 [02:43<01:32,  3.93it/s]

preds:[0, 0, 0, 0],label:[10, 1, 7, 2]


epoch:0,batch:636,lr:0.001,loss:2.1821,mean_loss:2.283,mean_f1:0.091:  64%|█████▋   | 637/1000 [02:43<01:32,  3.93it/s]

preds:[0, 0, 0, 0],label:[1, 2, 4, 5]


epoch:0,batch:637,lr:0.001,loss:2.0263,mean_loss:2.283,mean_f1:0.092:  64%|█████▋   | 638/1000 [02:44<01:31,  3.97it/s]

preds:[0, 0, 0, 0],label:[0, 0, 3, 6]


epoch:0,batch:638,lr:0.001,loss:2.9749,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 639/1000 [02:44<01:29,  4.01it/s]

preds:[0, 0, 0, 0],label:[12, 0, 7, 2]


epoch:0,batch:639,lr:0.001,loss:2.1469,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 640/1000 [02:44<01:30,  3.98it/s]

preds:[0, 0, 0, 0],label:[5, 4, 0, 2]


epoch:0,batch:640,lr:0.001,loss:2.7175,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 641/1000 [02:44<01:31,  3.93it/s]

preds:[0, 0, 0, 0],label:[1, 9, 7, 1]


epoch:0,batch:641,lr:0.001,loss:2.145,mean_loss:2.284,mean_f1:0.092:  64%|██████▍   | 642/1000 [02:45<01:33,  3.83it/s]

preds:[0, 0, 0, 0],label:[7, 2, 0, 0]


epoch:0,batch:642,lr:0.001,loss:2.1141,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 643/1000 [02:45<01:31,  3.90it/s]

preds:[0, 0, 0, 0],label:[0, 4, 4, 4]


epoch:0,batch:643,lr:0.001,loss:2.4115,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 644/1000 [02:45<01:31,  3.90it/s]

preds:[0, 0, 0, 0],label:[5, 0, 7, 2]


epoch:0,batch:644,lr:0.001,loss:2.2099,mean_loss:2.284,mean_f1:0.092:  64%|█████▊   | 645/1000 [02:45<01:29,  3.95it/s]

preds:[0, 0, 0, 0],label:[0, 8, 1, 2]


epoch:0,batch:645,lr:0.001,loss:2.0477,mean_loss:2.283,mean_f1:0.092:  65%|█████▊   | 646/1000 [02:46<01:30,  3.92it/s]

preds:[0, 0, 0, 0],label:[0, 4, 5, 0]


epoch:0,batch:646,lr:0.001,loss:2.9052,mean_loss:2.284,mean_f1:0.092:  65%|█████▊   | 647/1000 [02:46<01:29,  3.95it/s]

preds:[0, 0, 0, 0],label:[7, 9, 1, 5]


epoch:0,batch:647,lr:0.001,loss:2.5903,mean_loss:2.285,mean_f1:0.092:  65%|█████▊   | 648/1000 [02:46<01:28,  3.96it/s]

preds:[0, 0, 0, 0],label:[7, 6, 4, 4]


epoch:0,batch:648,lr:0.001,loss:2.9224,mean_loss:2.286,mean_f1:0.092:  65%|█████▊   | 649/1000 [02:46<01:28,  3.97it/s]

preds:[0, 0, 0, 0],label:[8, 5, 1, 8]


epoch:0,batch:649,lr:0.001,loss:2.3677,mean_loss:2.286,mean_f1:0.092:  65%|█████▊   | 650/1000 [02:47<01:28,  3.94it/s]

preds:[0, 0, 0, 0],label:[7, 2, 6, 0]


epoch:0,batch:650,lr:0.001,loss:1.9665,mean_loss:2.286,mean_f1:0.092:  65%|█████▊   | 651/1000 [02:47<01:26,  4.01it/s]

preds:[0, 0, 0, 0],label:[2, 5, 0, 0]


epoch:0,batch:651,lr:0.001,loss:2.4507,mean_loss:2.286,mean_f1:0.092:  65%|█████▊   | 652/1000 [02:47<01:26,  4.03it/s]

preds:[0, 0, 0, 0],label:[6, 9, 0, 0]


epoch:0,batch:652,lr:0.001,loss:2.3826,mean_loss:2.286,mean_f1:0.092:  65%|█████▉   | 653/1000 [02:47<01:29,  3.87it/s]

preds:[0, 0, 0, 0],label:[3, 5, 7, 0]


epoch:0,batch:653,lr:0.001,loss:1.9967,mean_loss:2.285,mean_f1:0.092:  65%|█████▉   | 654/1000 [02:48<01:30,  3.82it/s]

preds:[0, 0, 0, 0],label:[2, 1, 4, 1]


epoch:0,batch:654,lr:0.001,loss:1.8694,mean_loss:2.285,mean_f1:0.092:  66%|█████▉   | 655/1000 [02:48<01:29,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 0, 1, 2]


epoch:0,batch:655,lr:0.001,loss:2.041,mean_loss:2.284,mean_f1:0.092:  66%|██████▌   | 656/1000 [02:48<01:28,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 0, 1, 5]


epoch:0,batch:656,lr:0.001,loss:2.114,mean_loss:2.284,mean_f1:0.092:  66%|██████▌   | 657/1000 [02:48<01:28,  3.89it/s]

preds:[0, 0, 0, 0],label:[2, 1, 3, 2]


epoch:0,batch:657,lr:0.001,loss:2.5834,mean_loss:2.285,mean_f1:0.092:  66%|█████▉   | 658/1000 [02:49<01:27,  3.91it/s]

preds:[0, 0, 0, 0],label:[8, 1, 3, 3]


epoch:0,batch:658,lr:0.001,loss:1.9617,mean_loss:2.284,mean_f1:0.092:  66%|█████▉   | 659/1000 [02:49<01:26,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 2, 2, 1]


epoch:0,batch:659,lr:0.001,loss:1.9172,mean_loss:2.284,mean_f1:0.092:  66%|█████▉   | 660/1000 [02:49<01:25,  4.00it/s]

preds:[0, 0, 0, 0],label:[1, 1, 1, 2]


epoch:0,batch:660,lr:0.001,loss:2.6383,mean_loss:2.284,mean_f1:0.092:  66%|█████▉   | 661/1000 [02:49<01:24,  4.01it/s]

preds:[0, 0, 0, 0],label:[10, 2, 6, 0]


epoch:0,batch:661,lr:0.001,loss:2.4512,mean_loss:2.284,mean_f1:0.092:  66%|█████▉   | 662/1000 [02:50<01:23,  4.04it/s]

preds:[0, 0, 0, 0],label:[8, 5, 0, 4]


epoch:0,batch:662,lr:0.001,loss:2.9648,mean_loss:2.285,mean_f1:0.092:  66%|█████▉   | 663/1000 [02:50<01:23,  4.04it/s]

preds:[0, 0, 0, 0],label:[9, 5, 10, 0]


epoch:0,batch:663,lr:0.001,loss:2.452,mean_loss:2.286,mean_f1:0.091:  66%|██████▋   | 664/1000 [02:50<01:23,  4.02it/s]

preds:[0, 0, 0, 0],label:[7, 8, 1, 2]


epoch:0,batch:664,lr:0.001,loss:1.9941,mean_loss:2.285,mean_f1:0.092:  66%|█████▉   | 665/1000 [02:50<01:25,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 0, 6, 1]


epoch:0,batch:665,lr:0.001,loss:2.661,mean_loss:2.286,mean_f1:0.092:  67%|██████▋   | 666/1000 [02:51<01:24,  3.95it/s]

preds:[0, 0, 0, 0],label:[1, 5, 0, 12]


epoch:0,batch:666,lr:0.001,loss:2.3258,mean_loss:2.286,mean_f1:0.092:  67%|██████   | 667/1000 [02:51<01:23,  3.98it/s]

preds:[0, 0, 0, 0],label:[0, 1, 11, 0]


epoch:0,batch:667,lr:0.001,loss:1.8022,mean_loss:2.285,mean_f1:0.092:  67%|██████   | 668/1000 [02:51<01:24,  3.95it/s]

preds:[0, 0, 0, 0],label:[2, 0, 1, 2]


epoch:0,batch:668,lr:0.001,loss:2.4878,mean_loss:2.285,mean_f1:0.092:  67%|██████   | 669/1000 [02:51<01:24,  3.93it/s]

preds:[0, 0, 0, 0],label:[7, 6, 0, 3]


epoch:0,batch:669,lr:0.001,loss:2.2982,mean_loss:2.285,mean_f1:0.092:  67%|██████   | 670/1000 [02:52<01:23,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 4, 3, 3]


epoch:0,batch:670,lr:0.001,loss:2.0732,mean_loss:2.285,mean_f1:0.092:  67%|██████   | 671/1000 [02:52<01:24,  3.91it/s]

preds:[0, 0, 0, 0],label:[0, 3, 0, 5]


epoch:0,batch:671,lr:0.001,loss:2.5151,mean_loss:2.286,mean_f1:0.092:  67%|██████   | 672/1000 [02:52<01:25,  3.85it/s]

preds:[0, 0, 0, 0],label:[1, 2, 7, 9]


epoch:0,batch:672,lr:0.001,loss:2.4817,mean_loss:2.286,mean_f1:0.092:  67%|██████   | 673/1000 [02:53<01:25,  3.82it/s]

preds:[0, 0, 0, 0],label:[8, 5, 0, 3]


epoch:0,batch:673,lr:0.001,loss:1.791,mean_loss:2.285,mean_f1:0.092:  67%|██████▋   | 674/1000 [02:53<01:25,  3.80it/s]

preds:[0, 0, 0, 0],label:[2, 2, 0, 1]


epoch:0,batch:674,lr:0.001,loss:3.2643,mean_loss:2.287,mean_f1:0.092:  68%|██████   | 675/1000 [02:53<01:24,  3.85it/s]

preds:[0, 0, 0, 0],label:[9, 8, 8, 6]


epoch:0,batch:675,lr:0.001,loss:2.8598,mean_loss:2.287,mean_f1:0.092:  68%|██████   | 676/1000 [02:53<01:24,  3.85it/s]

preds:[0, 0, 0, 0],label:[10, 6, 2, 3]


epoch:0,batch:676,lr:0.001,loss:2.4523,mean_loss:2.288,mean_f1:0.092:  68%|██████   | 677/1000 [02:54<01:23,  3.86it/s]

preds:[0, 0, 0, 0],label:[4, 6, 3, 1]


epoch:0,batch:677,lr:0.001,loss:3.3417,mean_loss:2.289,mean_f1:0.092:  68%|██████   | 678/1000 [02:54<01:26,  3.72it/s]

preds:[0, 0, 0, 0],label:[3, 9, 4, 12]


epoch:0,batch:678,lr:0.001,loss:2.1008,mean_loss:2.289,mean_f1:0.092:  68%|██████   | 679/1000 [02:54<01:25,  3.76it/s]

preds:[0, 0, 0, 0],label:[4, 3, 0, 1]


epoch:0,batch:679,lr:0.001,loss:2.391,mean_loss:2.289,mean_f1:0.091:  68%|██████▊   | 680/1000 [02:54<01:24,  3.81it/s]

preds:[0, 0, 0, 0],label:[1, 3, 5, 7]


epoch:0,batch:680,lr:0.001,loss:2.0264,mean_loss:2.289,mean_f1:0.092:  68%|██████▏  | 681/1000 [02:55<01:23,  3.83it/s]

preds:[0, 0, 0, 0],label:[3, 1, 3, 0]


epoch:0,batch:681,lr:0.001,loss:2.2114,mean_loss:2.289,mean_f1:0.092:  68%|██████▏  | 682/1000 [02:55<01:22,  3.84it/s]

preds:[0, 0, 0, 0],label:[6, 1, 4, 0]


epoch:0,batch:682,lr:0.001,loss:2.6267,mean_loss:2.289,mean_f1:0.092:  68%|██████▏  | 683/1000 [02:55<01:21,  3.88it/s]

preds:[0, 0, 0, 0],label:[6, 11, 0, 1]


epoch:0,batch:683,lr:0.001,loss:2.8083,mean_loss:2.29,mean_f1:0.091:  68%|██████▊   | 684/1000 [02:55<01:21,  3.89it/s]

preds:[0, 0, 0, 0],label:[6, 6, 10, 1]


epoch:0,batch:684,lr:0.001,loss:2.4562,mean_loss:2.29,mean_f1:0.091:  68%|██████▊   | 685/1000 [02:56<01:20,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 6, 6, 5]


epoch:0,batch:685,lr:0.001,loss:2.1723,mean_loss:2.29,mean_f1:0.091:  69%|██████▊   | 686/1000 [02:56<01:19,  3.96it/s]

preds:[0, 0, 0, 0],label:[1, 5, 1, 6]


epoch:0,batch:686,lr:0.001,loss:2.4886,mean_loss:2.29,mean_f1:0.091:  69%|██████▊   | 687/1000 [02:56<01:19,  3.95it/s]

preds:[1, 1, 1, 1],label:[5, 4, 2, 5]


epoch:0,batch:687,lr:0.001,loss:2.2491,mean_loss:2.29,mean_f1:0.091:  69%|██████▉   | 688/1000 [02:56<01:18,  3.96it/s]

preds:[1, 1, 1, 1],label:[1, 2, 4, 5]


epoch:0,batch:688,lr:0.001,loss:2.5784,mean_loss:2.291,mean_f1:0.091:  69%|██████▏  | 689/1000 [02:57<01:19,  3.91it/s]

preds:[1, 1, 1, 1],label:[4, 1, 1, 11]


epoch:0,batch:689,lr:0.001,loss:2.9545,mean_loss:2.291,mean_f1:0.091:  69%|██████▏  | 690/1000 [02:57<01:19,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 13, 5, 6]


epoch:0,batch:690,lr:0.001,loss:2.3102,mean_loss:2.291,mean_f1:0.091:  69%|██████▏  | 691/1000 [02:57<01:20,  3.86it/s]

preds:[1, 1, 1, 1],label:[2, 3, 7, 0]


epoch:0,batch:691,lr:0.001,loss:1.9303,mean_loss:2.291,mean_f1:0.091:  69%|██████▏  | 692/1000 [02:57<01:19,  3.88it/s]

preds:[1, 1, 1, 1],label:[2, 2, 1, 0]


epoch:0,batch:692,lr:0.001,loss:3.2349,mean_loss:2.292,mean_f1:0.091:  69%|██████▏  | 693/1000 [02:58<01:17,  3.94it/s]

preds:[1, 1, 1, 1],label:[13, 3, 3, 9]


epoch:0,batch:693,lr:0.001,loss:3.1668,mean_loss:2.294,mean_f1:0.091:  69%|██████▏  | 694/1000 [02:58<01:16,  3.98it/s]

preds:[1, 1, 1, 1],label:[12, 4, 3, 8]


epoch:0,batch:694,lr:0.001,loss:2.7699,mean_loss:2.294,mean_f1:0.091:  70%|██████▎  | 695/1000 [02:58<01:16,  3.97it/s]

preds:[1, 1, 1, 1],label:[12, 5, 3, 0]


epoch:0,batch:695,lr:0.001,loss:2.4232,mean_loss:2.294,mean_f1:0.091:  70%|██████▎  | 696/1000 [02:58<01:18,  3.87it/s]

preds:[1, 1, 1, 1],label:[4, 8, 3, 1]


epoch:0,batch:696,lr:0.001,loss:2.4545,mean_loss:2.295,mean_f1:0.091:  70%|██████▎  | 697/1000 [02:59<01:18,  3.88it/s]

preds:[1, 1, 1, 1],label:[3, 8, 3, 3]


epoch:0,batch:697,lr:0.001,loss:1.9029,mean_loss:2.294,mean_f1:0.091:  70%|██████▎  | 698/1000 [02:59<01:16,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 3, 1, 3]


epoch:0,batch:698,lr:0.001,loss:2.5497,mean_loss:2.295,mean_f1:0.091:  70%|██████▎  | 699/1000 [02:59<01:15,  3.97it/s]

preds:[1, 1, 1, 1],label:[4, 2, 0, 9]


epoch:0,batch:699,lr:0.001,loss:2.0509,mean_loss:2.294,mean_f1:0.091:  70%|██████▎  | 700/1000 [02:59<01:14,  4.00it/s]

preds:[1, 1, 1, 1],label:[0, 1, 0, 4]


epoch:0,batch:700,lr:0.001,loss:2.6179,mean_loss:2.295,mean_f1:0.091:  70%|██████▎  | 701/1000 [03:00<01:14,  3.99it/s]

preds:[1, 1, 1, 1],label:[2, 0, 6, 10]


epoch:0,batch:701,lr:0.001,loss:1.8896,mean_loss:2.294,mean_f1:0.091:  70%|██████▎  | 702/1000 [03:00<01:13,  4.03it/s]

preds:[1, 1, 1, 1],label:[1, 1, 4, 1]


epoch:0,batch:702,lr:0.001,loss:1.8324,mean_loss:2.293,mean_f1:0.091:  70%|██████▎  | 703/1000 [03:00<01:17,  3.85it/s]

preds:[1, 1, 1, 1],label:[0, 1, 1, 0]


epoch:0,batch:703,lr:0.001,loss:2.2812,mean_loss:2.293,mean_f1:0.091:  70%|██████▎  | 704/1000 [03:00<01:16,  3.87it/s]

preds:[1, 1, 1, 1],label:[0, 7, 1, 2]


epoch:0,batch:704,lr:0.001,loss:2.08,mean_loss:2.293,mean_f1:0.092:  70%|███████▊   | 705/1000 [03:01<01:16,  3.84it/s]

preds:[1, 1, 1, 1],label:[1, 2, 0, 5]


epoch:0,batch:705,lr:0.001,loss:1.8644,mean_loss:2.292,mean_f1:0.092:  71%|██████▎  | 706/1000 [03:01<01:16,  3.83it/s]

preds:[1, 1, 1, 1],label:[0, 1, 2, 1]


epoch:0,batch:706,lr:0.001,loss:3.0759,mean_loss:2.294,mean_f1:0.092:  71%|██████▎  | 707/1000 [03:01<01:15,  3.88it/s]

preds:[1, 1, 1, 1],label:[4, 1, 10, 12]


epoch:0,batch:707,lr:0.001,loss:2.5592,mean_loss:2.294,mean_f1:0.092:  71%|██████▎  | 708/1000 [03:02<01:15,  3.86it/s]

preds:[1, 1, 1, 1],label:[6, 2, 2, 7]


epoch:0,batch:708,lr:0.001,loss:2.4617,mean_loss:2.294,mean_f1:0.092:  71%|██████▍  | 709/1000 [03:02<01:15,  3.87it/s]

preds:[1, 1, 1, 1],label:[6, 6, 1, 7]


epoch:0,batch:709,lr:0.001,loss:2.6857,mean_loss:2.295,mean_f1:0.092:  71%|██████▍  | 710/1000 [03:02<01:14,  3.91it/s]

preds:[1, 1, 1, 1],label:[0, 4, 4, 10]


epoch:0,batch:710,lr:0.001,loss:2.3051,mean_loss:2.295,mean_f1:0.092:  71%|██████▍  | 711/1000 [03:02<01:13,  3.93it/s]

preds:[1, 1, 1, 1],label:[1, 0, 7, 4]


epoch:0,batch:711,lr:0.001,loss:2.2048,mean_loss:2.295,mean_f1:0.092:  71%|██████▍  | 712/1000 [03:03<01:13,  3.91it/s]

preds:[1, 1, 1, 1],label:[1, 4, 5, 2]


epoch:0,batch:712,lr:0.001,loss:1.9744,mean_loss:2.294,mean_f1:0.091:  71%|██████▍  | 713/1000 [03:03<01:12,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 0, 3, 0]


epoch:0,batch:713,lr:0.001,loss:2.0709,mean_loss:2.294,mean_f1:0.091:  71%|██████▍  | 714/1000 [03:03<01:12,  3.92it/s]

preds:[1, 1, 1, 1],label:[3, 1, 4, 2]


epoch:0,batch:714,lr:0.001,loss:3.0176,mean_loss:2.295,mean_f1:0.091:  72%|██████▍  | 715/1000 [03:03<01:12,  3.95it/s]

preds:[1, 1, 1, 1],label:[11, 6, 8, 0]


epoch:0,batch:715,lr:0.001,loss:2.5898,mean_loss:2.295,mean_f1:0.091:  72%|██████▍  | 716/1000 [03:04<01:13,  3.89it/s]

preds:[1, 1, 1, 1],label:[8, 4, 7, 1]


epoch:0,batch:716,lr:0.001,loss:3.6059,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 717/1000 [03:04<01:12,  3.92it/s]

preds:[1, 1, 1, 1],label:[10, 10, 10, 8]


epoch:0,batch:717,lr:0.001,loss:1.9857,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 718/1000 [03:04<01:11,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 0, 4, 2]


epoch:0,batch:718,lr:0.001,loss:2.3179,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 719/1000 [03:04<01:11,  3.93it/s]

preds:[1, 1, 1, 1],label:[10, 2, 0, 1]


epoch:0,batch:719,lr:0.001,loss:2.5725,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 720/1000 [03:05<01:10,  3.95it/s]

preds:[1, 1, 1, 1],label:[2, 6, 4, 8]


epoch:0,batch:720,lr:0.001,loss:1.9594,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 721/1000 [03:05<01:10,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 0, 5, 1]


epoch:0,batch:721,lr:0.001,loss:2.6203,mean_loss:2.297,mean_f1:0.091:  72%|██████▍  | 722/1000 [03:05<01:10,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 8, 10, 2]


epoch:0,batch:722,lr:0.001,loss:2.5035,mean_loss:2.297,mean_f1:0.091:  72%|██████▌  | 723/1000 [03:05<01:10,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 9, 3, 0]


epoch:0,batch:723,lr:0.001,loss:1.8097,mean_loss:2.297,mean_f1:0.091:  72%|██████▌  | 724/1000 [03:06<01:10,  3.93it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 2]


epoch:0,batch:724,lr:0.001,loss:2.2697,mean_loss:2.297,mean_f1:0.091:  72%|██████▌  | 725/1000 [03:06<01:10,  3.91it/s]

preds:[1, 1, 1, 1],label:[9, 2, 1, 1]


epoch:0,batch:725,lr:0.001,loss:2.7487,mean_loss:2.297,mean_f1:0.091:  73%|██████▌  | 726/1000 [03:06<01:09,  3.93it/s]

preds:[1, 1, 1, 1],label:[7, 0, 11, 0]


epoch:0,batch:726,lr:0.001,loss:2.6384,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 727/1000 [03:06<01:10,  3.85it/s]

preds:[1, 1, 1, 1],label:[0, 9, 10, 0]


epoch:0,batch:727,lr:0.001,loss:2.2776,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 728/1000 [03:07<01:13,  3.71it/s]

preds:[1, 1, 1, 1],label:[1, 3, 3, 6]


epoch:0,batch:728,lr:0.001,loss:2.4314,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 729/1000 [03:07<01:11,  3.78it/s]

preds:[1, 1, 1, 1],label:[0, 1, 2, 11]


epoch:0,batch:729,lr:0.001,loss:2.3504,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 730/1000 [03:07<01:10,  3.81it/s]

preds:[1, 1, 1, 1],label:[3, 7, 0, 2]


epoch:0,batch:730,lr:0.001,loss:2.2056,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 731/1000 [03:07<01:09,  3.90it/s]

preds:[1, 1, 1, 1],label:[1, 0, 3, 7]


epoch:0,batch:731,lr:0.001,loss:2.0556,mean_loss:2.298,mean_f1:0.091:  73%|██████▌  | 732/1000 [03:08<01:08,  3.89it/s]

preds:[1, 1, 1, 1],label:[0, 0, 7, 0]


epoch:0,batch:732,lr:0.001,loss:2.0568,mean_loss:2.297,mean_f1:0.091:  73%|██████▌  | 733/1000 [03:08<01:07,  3.94it/s]

preds:[1, 1, 1, 1],label:[1, 4, 2, 2]


epoch:0,batch:733,lr:0.001,loss:2.2888,mean_loss:2.297,mean_f1:0.091:  73%|██████▌  | 734/1000 [03:08<01:07,  3.95it/s]

preds:[1, 1, 1, 1],label:[5, 1, 4, 2]


epoch:0,batch:734,lr:0.001,loss:2.7068,mean_loss:2.298,mean_f1:0.091:  74%|██████▌  | 735/1000 [03:08<01:06,  3.98it/s]

preds:[1, 1, 1, 1],label:[13, 1, 2, 2]


epoch:0,batch:735,lr:0.001,loss:1.9415,mean_loss:2.297,mean_f1:0.091:  74%|██████▌  | 736/1000 [03:09<01:05,  4.02it/s]

preds:[1, 1, 1, 1],label:[4, 2, 0, 1]


epoch:0,batch:736,lr:0.001,loss:1.748,mean_loss:2.296,mean_f1:0.091:  74%|███████▎  | 737/1000 [03:09<01:05,  4.02it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 2]


epoch:0,batch:737,lr:0.001,loss:2.2359,mean_loss:2.296,mean_f1:0.091:  74%|██████▋  | 738/1000 [03:09<01:05,  4.01it/s]

preds:[0, 0, 0, 0],label:[4, 3, 0, 3]


epoch:0,batch:738,lr:0.001,loss:2.6303,mean_loss:2.297,mean_f1:0.091:  74%|██████▋  | 739/1000 [03:09<01:04,  4.05it/s]

preds:[0, 0, 0, 0],label:[2, 9, 6, 2]


epoch:0,batch:739,lr:0.001,loss:2.6518,mean_loss:2.297,mean_f1:0.091:  74%|██████▋  | 740/1000 [03:10<01:05,  3.96it/s]

preds:[0, 0, 0, 0],label:[2, 6, 1, 11]


epoch:0,batch:740,lr:0.001,loss:2.5762,mean_loss:2.298,mean_f1:0.091:  74%|██████▋  | 741/1000 [03:10<01:05,  3.97it/s]

preds:[0, 0, 0, 0],label:[9, 1, 5, 2]


epoch:0,batch:741,lr:0.001,loss:2.1175,mean_loss:2.297,mean_f1:0.091:  74%|██████▋  | 742/1000 [03:10<01:04,  3.99it/s]

preds:[0, 0, 0, 0],label:[3, 4, 2, 1]


epoch:0,batch:742,lr:0.001,loss:1.8487,mean_loss:2.297,mean_f1:0.091:  74%|██████▋  | 743/1000 [03:10<01:04,  3.96it/s]

preds:[1, 1, 1, 1],label:[1, 1, 0, 3]


epoch:0,batch:743,lr:0.001,loss:1.7138,mean_loss:2.296,mean_f1:0.091:  74%|██████▋  | 744/1000 [03:11<01:04,  3.94it/s]

preds:[1, 1, 1, 1],label:[0, 0, 2, 1]


epoch:0,batch:744,lr:0.001,loss:1.8556,mean_loss:2.295,mean_f1:0.091:  74%|██████▋  | 745/1000 [03:11<01:05,  3.92it/s]

preds:[1, 1, 1, 1],label:[4, 0, 1, 0]


epoch:0,batch:745,lr:0.001,loss:1.8855,mean_loss:2.295,mean_f1:0.091:  75%|██████▋  | 746/1000 [03:11<01:03,  3.98it/s]

preds:[1, 1, 1, 1],label:[1, 1, 3, 2]


epoch:0,batch:746,lr:0.001,loss:3.0031,mean_loss:2.296,mean_f1:0.091:  75%|██████▋  | 747/1000 [03:11<01:03,  4.00it/s]

preds:[1, 1, 1, 1],label:[0, 8, 11, 7]


epoch:0,batch:747,lr:0.001,loss:2.602,mean_loss:2.296,mean_f1:0.091:  75%|███████▍  | 748/1000 [03:12<01:03,  3.99it/s]

preds:[1, 1, 1, 1],label:[12, 2, 3, 0]


epoch:0,batch:748,lr:0.001,loss:1.8779,mean_loss:2.296,mean_f1:0.091:  75%|██████▋  | 749/1000 [03:12<01:02,  4.01it/s]

preds:[1, 1, 1, 1],label:[4, 1, 1, 2]


epoch:0,batch:749,lr:0.001,loss:2.2441,mean_loss:2.296,mean_f1:0.091:  75%|██████▊  | 750/1000 [03:12<01:02,  4.02it/s]

preds:[1, 1, 1, 1],label:[3, 0, 2, 7]


epoch:0,batch:750,lr:0.001,loss:2.187,mean_loss:2.296,mean_f1:0.091:  75%|███████▌  | 751/1000 [03:12<01:02,  3.98it/s]

preds:[1, 1, 1, 1],label:[3, 7, 1, 0]


epoch:0,batch:751,lr:0.001,loss:2.4511,mean_loss:2.296,mean_f1:0.091:  75%|██████▊  | 752/1000 [03:13<01:01,  4.05it/s]

preds:[1, 1, 1, 1],label:[1, 8, 7, 2]


epoch:0,batch:752,lr:0.001,loss:2.1334,mean_loss:2.296,mean_f1:0.091:  75%|██████▊  | 753/1000 [03:13<01:06,  3.71it/s]

preds:[1, 1, 1, 1],label:[0, 0, 9, 0]


epoch:0,batch:753,lr:0.001,loss:3.4318,mean_loss:2.297,mean_f1:0.091:  75%|██████▊  | 754/1000 [03:13<01:04,  3.83it/s]

preds:[1, 1, 1, 1],label:[9, 7, 5, 11]


epoch:0,batch:754,lr:0.001,loss:2.2403,mean_loss:2.297,mean_f1:0.091:  76%|██████▊  | 755/1000 [03:13<01:02,  3.92it/s]

preds:[1, 1, 1, 1],label:[2, 4, 0, 7]


epoch:0,batch:755,lr:0.001,loss:2.026,mean_loss:2.297,mean_f1:0.091:  76%|███████▌  | 756/1000 [03:14<01:01,  3.95it/s]

preds:[1, 1, 1, 1],label:[2, 1, 6, 1]


epoch:0,batch:756,lr:0.001,loss:2.1386,mean_loss:2.296,mean_f1:0.091:  76%|██████▊  | 757/1000 [03:14<01:01,  3.92it/s]

preds:[1, 1, 1, 1],label:[2, 1, 4, 4]


epoch:0,batch:757,lr:0.001,loss:2.7231,mean_loss:2.297,mean_f1:0.091:  76%|██████▊  | 758/1000 [03:14<01:00,  3.97it/s]

preds:[1, 1, 1, 1],label:[7, 7, 8, 2]


epoch:0,batch:758,lr:0.001,loss:2.298,mean_loss:2.297,mean_f1:0.091:  76%|███████▌  | 759/1000 [03:14<01:00,  4.00it/s]

preds:[1, 1, 1, 1],label:[5, 4, 1, 0]


epoch:0,batch:759,lr:0.001,loss:2.9998,mean_loss:2.298,mean_f1:0.091:  76%|██████▊  | 760/1000 [03:15<01:00,  4.00it/s]

preds:[1, 1, 1, 1],label:[12, 1, 4, 5]


epoch:0,batch:760,lr:0.001,loss:2.0736,mean_loss:2.298,mean_f1:0.091:  76%|██████▊  | 761/1000 [03:15<00:59,  4.01it/s]

preds:[1, 1, 1, 1],label:[1, 0, 0, 5]


epoch:0,batch:761,lr:0.001,loss:2.5135,mean_loss:2.298,mean_f1:0.091:  76%|██████▊  | 762/1000 [03:15<01:00,  3.95it/s]

preds:[1, 1, 1, 1],label:[5, 2, 1, 8]


epoch:0,batch:762,lr:0.001,loss:1.863,mean_loss:2.297,mean_f1:0.091:  76%|███████▋  | 763/1000 [03:15<00:59,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 0, 0, 3]


epoch:0,batch:763,lr:0.001,loss:2.9699,mean_loss:2.298,mean_f1:0.091:  76%|██████▉  | 764/1000 [03:16<00:59,  3.95it/s]

preds:[1, 1, 1, 1],label:[9, 7, 3, 5]


epoch:0,batch:764,lr:0.001,loss:2.5834,mean_loss:2.299,mean_f1:0.091:  76%|██████▉  | 765/1000 [03:16<00:59,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 9, 9, 0]


epoch:0,batch:765,lr:0.001,loss:2.4936,mean_loss:2.299,mean_f1:0.091:  77%|██████▉  | 766/1000 [03:16<00:59,  3.93it/s]

preds:[1, 1, 1, 1],label:[5, 7, 1, 4]


epoch:0,batch:766,lr:0.001,loss:2.5001,mean_loss:2.299,mean_f1:0.091:  77%|██████▉  | 767/1000 [03:16<00:58,  4.01it/s]

preds:[1, 1, 1, 1],label:[4, 8, 3, 0]


epoch:0,batch:767,lr:0.001,loss:2.5275,mean_loss:2.299,mean_f1:0.091:  77%|██████▉  | 768/1000 [03:17<00:57,  4.00it/s]

preds:[1, 1, 1, 1],label:[9, 2, 5, 2]


epoch:0,batch:768,lr:0.001,loss:2.7635,mean_loss:2.3,mean_f1:0.091:  77%|████████▍  | 769/1000 [03:17<00:57,  4.01it/s]

preds:[1, 1, 1, 1],label:[2, 1, 3, 13]


epoch:0,batch:769,lr:0.001,loss:2.0793,mean_loss:2.3,mean_f1:0.091:  77%|████████▍  | 770/1000 [03:17<00:57,  4.01it/s]

preds:[1, 1, 1, 1],label:[1, 0, 8, 0]


epoch:0,batch:770,lr:0.001,loss:2.134,mean_loss:2.299,mean_f1:0.091:  77%|███████▋  | 771/1000 [03:17<00:56,  4.07it/s]

preds:[1, 1, 1, 1],label:[2, 1, 0, 6]


epoch:0,batch:771,lr:0.001,loss:3.2151,mean_loss:2.301,mean_f1:0.091:  77%|██████▉  | 772/1000 [03:18<00:56,  4.00it/s]

preds:[1, 1, 1, 1],label:[6, 5, 9, 6]


epoch:0,batch:772,lr:0.001,loss:2.4792,mean_loss:2.301,mean_f1:0.091:  77%|██████▉  | 773/1000 [03:18<00:56,  3.99it/s]

preds:[1, 1, 1, 1],label:[2, 5, 7, 3]


epoch:0,batch:773,lr:0.001,loss:2.117,mean_loss:2.301,mean_f1:0.091:  77%|███████▋  | 774/1000 [03:18<00:56,  4.01it/s]

preds:[1, 1, 1, 1],label:[4, 1, 7, 0]


epoch:0,batch:774,lr:0.001,loss:2.3798,mean_loss:2.301,mean_f1:0.091:  78%|██████▉  | 775/1000 [03:18<00:55,  4.06it/s]

preds:[1, 1, 1, 1],label:[5, 0, 1, 8]


epoch:0,batch:775,lr:0.001,loss:1.9106,mean_loss:2.3,mean_f1:0.091:  78%|████████▌  | 776/1000 [03:19<00:55,  4.07it/s]

preds:[0, 0, 0, 0],label:[3, 1, 1, 0]


epoch:0,batch:776,lr:0.001,loss:2.1125,mean_loss:2.3,mean_f1:0.091:  78%|████████▌  | 777/1000 [03:19<00:54,  4.08it/s]

preds:[1, 1, 1, 1],label:[0, 1, 3, 4]


epoch:0,batch:777,lr:0.001,loss:2.8697,mean_loss:2.301,mean_f1:0.091:  78%|███████  | 778/1000 [03:19<00:57,  3.86it/s]

preds:[1, 1, 1, 1],label:[4, 9, 9, 5]


epoch:0,batch:778,lr:0.001,loss:2.9055,mean_loss:2.302,mean_f1:0.091:  78%|███████  | 779/1000 [03:19<00:57,  3.87it/s]

preds:[1, 1, 1, 1],label:[13, 1, 3, 7]


epoch:0,batch:779,lr:0.001,loss:2.2388,mean_loss:2.301,mean_f1:0.091:  78%|███████  | 780/1000 [03:20<00:57,  3.84it/s]

preds:[1, 1, 1, 1],label:[5, 7, 2, 1]


epoch:0,batch:780,lr:0.001,loss:2.6844,mean_loss:2.302,mean_f1:0.09:  78%|███████▊  | 781/1000 [03:20<00:56,  3.87it/s]

preds:[1, 1, 1, 1],label:[8, 10, 2, 0]


epoch:0,batch:781,lr:0.001,loss:2.1093,mean_loss:2.302,mean_f1:0.091:  78%|███████  | 782/1000 [03:20<00:56,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 7, 1, 4]


epoch:0,batch:782,lr:0.001,loss:2.2331,mean_loss:2.302,mean_f1:0.09:  78%|███████▊  | 783/1000 [03:21<00:56,  3.84it/s]

preds:[1, 1, 1, 1],label:[5, 2, 3, 0]


epoch:0,batch:783,lr:0.001,loss:2.4401,mean_loss:2.302,mean_f1:0.09:  78%|███████▊  | 784/1000 [03:21<00:55,  3.88it/s]

preds:[1, 1, 1, 1],label:[3, 9, 1, 5]


epoch:0,batch:784,lr:0.001,loss:2.5077,mean_loss:2.302,mean_f1:0.09:  78%|███████▊  | 785/1000 [03:21<00:54,  3.92it/s]

preds:[1, 1, 1, 1],label:[1, 0, 11, 4]


epoch:0,batch:785,lr:0.001,loss:2.4502,mean_loss:2.302,mean_f1:0.091:  79%|███████  | 786/1000 [03:21<00:54,  3.89it/s]

preds:[1, 1, 1, 1],label:[8, 3, 4, 1]


epoch:0,batch:786,lr:0.001,loss:2.1559,mean_loss:2.302,mean_f1:0.09:  79%|███████▊  | 787/1000 [03:22<00:55,  3.84it/s]

preds:[1, 1, 1, 1],label:[4, 0, 0, 7]


epoch:0,batch:787,lr:0.001,loss:2.5557,mean_loss:2.302,mean_f1:0.09:  79%|███████▉  | 788/1000 [03:22<00:55,  3.84it/s]

preds:[1, 1, 1, 1],label:[3, 0, 2, 10]


epoch:0,batch:788,lr:0.001,loss:2.2241,mean_loss:2.302,mean_f1:0.09:  79%|███████▉  | 789/1000 [03:22<00:54,  3.86it/s]

preds:[1, 1, 1, 1],label:[5, 1, 2, 5]


epoch:0,batch:789,lr:0.001,loss:1.9805,mean_loss:2.302,mean_f1:0.09:  79%|███████▉  | 790/1000 [03:22<00:54,  3.83it/s]

preds:[1, 1, 1, 1],label:[2, 3, 1, 1]


epoch:0,batch:790,lr:0.001,loss:1.9757,mean_loss:2.301,mean_f1:0.091:  79%|███████  | 791/1000 [03:23<00:54,  3.81it/s]

preds:[1, 1, 1, 1],label:[1, 1, 2, 3]


epoch:0,batch:791,lr:0.001,loss:2.5229,mean_loss:2.302,mean_f1:0.091:  79%|███████▏ | 792/1000 [03:23<00:54,  3.85it/s]

preds:[1, 1, 1, 1],label:[4, 3, 2, 8]


epoch:0,batch:792,lr:0.001,loss:2.2409,mean_loss:2.302,mean_f1:0.09:  79%|███████▉  | 793/1000 [03:23<00:54,  3.83it/s]

preds:[1, 1, 1, 1],label:[2, 0, 7, 4]


epoch:0,batch:793,lr:0.001,loss:2.1826,mean_loss:2.301,mean_f1:0.09:  79%|███████▉  | 794/1000 [03:23<00:54,  3.79it/s]

preds:[1, 1, 1, 1],label:[8, 2, 1, 0]


epoch:0,batch:794,lr:0.001,loss:2.2599,mean_loss:2.301,mean_f1:0.09:  80%|███████▉  | 795/1000 [03:24<00:55,  3.68it/s]

preds:[1, 1, 1, 1],label:[1, 6, 2, 0]


epoch:0,batch:795,lr:0.001,loss:2.9548,mean_loss:2.302,mean_f1:0.09:  80%|███████▉  | 796/1000 [03:24<00:54,  3.72it/s]

preds:[1, 1, 1, 1],label:[10, 2, 6, 5]


epoch:0,batch:796,lr:0.001,loss:2.691,mean_loss:2.303,mean_f1:0.09:  80%|████████▊  | 797/1000 [03:24<00:54,  3.75it/s]

preds:[1, 1, 1, 1],label:[10, 7, 0, 4]


epoch:0,batch:797,lr:0.001,loss:2.456,mean_loss:2.303,mean_f1:0.09:  80%|████████▊  | 798/1000 [03:24<00:53,  3.76it/s]

preds:[1, 1, 1, 1],label:[6, 3, 2, 2]


epoch:0,batch:798,lr:0.001,loss:1.9183,mean_loss:2.302,mean_f1:0.09:  80%|███████▉  | 799/1000 [03:25<00:52,  3.81it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 4]


epoch:0,batch:799,lr:0.001,loss:2.0254,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 800/1000 [03:25<00:52,  3.81it/s]

preds:[1, 1, 1, 1],label:[0, 1, 0, 7]


epoch:0,batch:800,lr:0.001,loss:2.4274,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 801/1000 [03:25<00:51,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 4, 3, 7]


epoch:0,batch:801,lr:0.001,loss:2.0229,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 802/1000 [03:26<00:55,  3.59it/s]

preds:[1, 1, 1, 1],label:[2, 1, 0, 5]


epoch:0,batch:802,lr:0.001,loss:2.3064,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 803/1000 [03:26<00:53,  3.66it/s]

preds:[1, 1, 1, 1],label:[3, 6, 0, 1]


epoch:0,batch:803,lr:0.001,loss:2.1819,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 804/1000 [03:26<00:53,  3.70it/s]

preds:[1, 1, 1, 1],label:[0, 3, 2, 5]


epoch:0,batch:804,lr:0.001,loss:2.7002,mean_loss:2.302,mean_f1:0.09:  80%|████████  | 805/1000 [03:26<00:52,  3.71it/s]

preds:[1, 1, 1, 1],label:[1, 2, 4, 13]


epoch:0,batch:805,lr:0.001,loss:2.6504,mean_loss:2.303,mean_f1:0.09:  81%|████████  | 806/1000 [03:27<00:52,  3.73it/s]

preds:[1, 1, 1, 1],label:[12, 3, 2, 1]


epoch:0,batch:806,lr:0.001,loss:2.7781,mean_loss:2.303,mean_f1:0.09:  81%|████████  | 807/1000 [03:27<00:51,  3.73it/s]

preds:[1, 1, 1, 1],label:[4, 1, 9, 10]


epoch:0,batch:807,lr:0.001,loss:2.0872,mean_loss:2.303,mean_f1:0.09:  81%|████████  | 808/1000 [03:27<00:50,  3.77it/s]

preds:[1, 1, 1, 1],label:[5, 2, 2, 0]


epoch:0,batch:808,lr:0.001,loss:2.7055,mean_loss:2.304,mean_f1:0.09:  81%|████████  | 809/1000 [03:27<00:51,  3.74it/s]

preds:[1, 1, 1, 1],label:[12, 0, 4, 0]


epoch:0,batch:809,lr:0.001,loss:2.7605,mean_loss:2.304,mean_f1:0.09:  81%|████████  | 810/1000 [03:28<00:49,  3.80it/s]

preds:[1, 1, 1, 1],label:[8, 6, 7, 2]


epoch:0,batch:810,lr:0.001,loss:1.93,mean_loss:2.304,mean_f1:0.09:  81%|█████████▋  | 811/1000 [03:28<00:49,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 0, 0, 3]


epoch:0,batch:811,lr:0.001,loss:2.071,mean_loss:2.303,mean_f1:0.09:  81%|████████▉  | 812/1000 [03:28<00:50,  3.71it/s]

preds:[1, 1, 1, 1],label:[0, 0, 7, 0]


epoch:0,batch:812,lr:0.001,loss:1.7824,mean_loss:2.303,mean_f1:0.09:  81%|████████▏ | 813/1000 [03:28<00:50,  3.71it/s]

preds:[1, 1, 1, 1],label:[0, 0, 1, 1]


epoch:0,batch:813,lr:0.001,loss:1.7815,mean_loss:2.302,mean_f1:0.09:  81%|████████▏ | 814/1000 [03:29<00:50,  3.72it/s]

preds:[1, 1, 1, 1],label:[1, 0, 0, 0]


epoch:0,batch:814,lr:0.001,loss:2.6423,mean_loss:2.302,mean_f1:0.09:  82%|████████▏ | 815/1000 [03:29<00:48,  3.79it/s]

preds:[1, 1, 1, 1],label:[12, 0, 4, 2]


epoch:0,batch:815,lr:0.001,loss:2.7534,mean_loss:2.303,mean_f1:0.09:  82%|████████▏ | 816/1000 [03:29<00:48,  3.83it/s]

preds:[0, 0, 0, 0],label:[9, 5, 1, 8]


epoch:0,batch:816,lr:0.001,loss:2.5925,mean_loss:2.303,mean_f1:0.09:  82%|████████▏ | 817/1000 [03:30<00:48,  3.80it/s]

preds:[0, 0, 0, 0],label:[1, 5, 0, 11]


epoch:0,batch:817,lr:0.001,loss:2.237,mean_loss:2.303,mean_f1:0.09:  82%|████████▉  | 818/1000 [03:30<00:47,  3.83it/s]

preds:[0, 0, 0, 0],label:[3, 4, 1, 4]


epoch:0,batch:818,lr:0.001,loss:1.9769,mean_loss:2.303,mean_f1:0.09:  82%|████████▏ | 819/1000 [03:30<00:47,  3.84it/s]

preds:[0, 0, 0, 0],label:[4, 2, 1, 1]


epoch:0,batch:819,lr:0.001,loss:2.0975,mean_loss:2.303,mean_f1:0.09:  82%|████████▏ | 820/1000 [03:30<00:47,  3.82it/s]

preds:[0, 0, 0, 0],label:[0, 6, 1, 1]


epoch:0,batch:820,lr:0.001,loss:1.922,mean_loss:2.302,mean_f1:0.09:  82%|█████████  | 821/1000 [03:31<00:48,  3.69it/s]

preds:[0, 0, 0, 0],label:[2, 3, 0, 1]


epoch:0,batch:821,lr:0.001,loss:2.362,mean_loss:2.302,mean_f1:0.09:  82%|█████████  | 822/1000 [03:31<00:49,  3.60it/s]

preds:[0, 0, 0, 0],label:[5, 4, 3, 2]


epoch:0,batch:822,lr:0.001,loss:2.347,mean_loss:2.302,mean_f1:0.09:  82%|█████████  | 823/1000 [03:31<00:50,  3.51it/s]

preds:[0, 0, 0, 0],label:[5, 1, 1, 6]


epoch:0,batch:823,lr:0.001,loss:2.6852,mean_loss:2.303,mean_f1:0.09:  82%|████████▏ | 824/1000 [03:31<00:50,  3.47it/s]

preds:[0, 0, 0, 0],label:[8, 7, 7, 0]


epoch:0,batch:824,lr:0.001,loss:1.7848,mean_loss:2.302,mean_f1:0.09:  82%|████████▎ | 825/1000 [03:32<00:52,  3.34it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 4]


epoch:0,batch:825,lr:0.001,loss:2.2481,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 826/1000 [03:32<00:50,  3.43it/s]

preds:[0, 0, 0, 0],label:[8, 1, 1, 4]


epoch:0,batch:826,lr:0.001,loss:1.5942,mean_loss:2.301,mean_f1:0.09:  83%|████████▎ | 827/1000 [03:32<00:49,  3.46it/s]

preds:[0, 0, 0, 0],label:[0, 1, 0, 1]


epoch:0,batch:827,lr:0.001,loss:2.5243,mean_loss:2.301,mean_f1:0.09:  83%|████████▎ | 828/1000 [03:33<00:49,  3.47it/s]

preds:[0, 0, 0, 0],label:[1, 9, 6, 1]


epoch:0,batch:828,lr:0.001,loss:2.2722,mean_loss:2.301,mean_f1:0.09:  83%|████████▎ | 829/1000 [03:33<00:47,  3.61it/s]

preds:[0, 0, 0, 0],label:[9, 0, 3, 0]


epoch:0,batch:829,lr:0.001,loss:2.7716,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 830/1000 [03:33<00:46,  3.63it/s]

preds:[0, 0, 0, 0],label:[11, 1, 3, 5]


epoch:0,batch:830,lr:0.001,loss:2.4482,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 831/1000 [03:33<00:45,  3.73it/s]

preds:[0, 0, 0, 0],label:[4, 11, 1, 1]


epoch:0,batch:831,lr:0.001,loss:1.9317,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 832/1000 [03:34<00:44,  3.80it/s]

preds:[0, 0, 0, 0],label:[0, 3, 2, 0]


epoch:0,batch:832,lr:0.001,loss:2.8128,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 833/1000 [03:34<00:43,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 8, 2, 8]


epoch:0,batch:833,lr:0.001,loss:1.7303,mean_loss:2.302,mean_f1:0.09:  83%|████████▎ | 834/1000 [03:34<00:43,  3.86it/s]

preds:[1, 1, 1, 1],label:[0, 4, 0, 1]


epoch:0,batch:834,lr:0.001,loss:2.6187,mean_loss:2.302,mean_f1:0.09:  84%|████████▎ | 835/1000 [03:34<00:42,  3.90it/s]

preds:[1, 1, 1, 1],label:[6, 11, 1, 1]


epoch:0,batch:835,lr:0.001,loss:2.5968,mean_loss:2.302,mean_f1:0.09:  84%|████████▎ | 836/1000 [03:35<00:42,  3.89it/s]

preds:[1, 1, 1, 1],label:[6, 0, 4, 8]


epoch:0,batch:836,lr:0.001,loss:2.4911,mean_loss:2.303,mean_f1:0.09:  84%|████████▎ | 837/1000 [03:35<00:42,  3.87it/s]

preds:[1, 1, 1, 1],label:[3, 5, 1, 6]


epoch:0,batch:837,lr:0.001,loss:2.2129,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 838/1000 [03:35<00:41,  3.90it/s]

preds:[1, 1, 1, 1],label:[5, 8, 1, 1]


epoch:0,batch:838,lr:0.001,loss:2.7864,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 839/1000 [03:35<00:41,  3.90it/s]

preds:[1, 1, 1, 1],label:[0, 10, 4, 7]


epoch:0,batch:839,lr:0.001,loss:2.1929,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 840/1000 [03:36<00:41,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 0, 3, 6]


epoch:0,batch:840,lr:0.001,loss:2.1685,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 841/1000 [03:36<00:41,  3.84it/s]

preds:[1, 1, 1, 1],label:[2, 3, 0, 2]


epoch:0,batch:841,lr:0.001,loss:2.3405,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 842/1000 [03:36<00:41,  3.83it/s]

preds:[1, 1, 1, 1],label:[7, 1, 4, 2]


epoch:0,batch:842,lr:0.001,loss:2.5676,mean_loss:2.303,mean_f1:0.09:  84%|████████▍ | 843/1000 [03:37<00:41,  3.80it/s]

preds:[1, 1, 1, 1],label:[3, 2, 11, 1]


epoch:0,batch:843,lr:0.001,loss:2.9971,mean_loss:2.304,mean_f1:0.09:  84%|████████▍ | 844/1000 [03:37<00:40,  3.83it/s]

preds:[1, 1, 1, 1],label:[4, 2, 13, 4]


epoch:0,batch:844,lr:0.001,loss:1.9732,mean_loss:2.304,mean_f1:0.09:  84%|████████▍ | 845/1000 [03:37<00:40,  3.86it/s]

preds:[1, 1, 1, 1],label:[2, 3, 0, 0]


epoch:0,batch:845,lr:0.001,loss:2.161,mean_loss:2.303,mean_f1:0.09:  85%|█████████▎ | 846/1000 [03:37<00:39,  3.86it/s]

preds:[1, 1, 1, 1],label:[4, 0, 3, 2]


epoch:0,batch:846,lr:0.001,loss:1.9673,mean_loss:2.303,mean_f1:0.09:  85%|████████▍ | 847/1000 [03:38<00:40,  3.82it/s]

preds:[1, 1, 1, 1],label:[1, 3, 3, 0]


epoch:0,batch:847,lr:0.001,loss:2.7357,mean_loss:2.304,mean_f1:0.09:  85%|████████▍ | 848/1000 [03:38<00:42,  3.57it/s]

preds:[1, 1, 1, 1],label:[2, 10, 3, 2]


epoch:0,batch:848,lr:0.001,loss:2.314,mean_loss:2.304,mean_f1:0.089:  85%|████████▍ | 849/1000 [03:38<00:41,  3.63it/s]

preds:[1, 1, 1, 1],label:[2, 3, 3, 3]


epoch:0,batch:849,lr:0.001,loss:1.7404,mean_loss:2.303,mean_f1:0.09:  85%|████████▌ | 850/1000 [03:38<00:40,  3.74it/s]

preds:[1, 1, 1, 1],label:[0, 1, 3, 1]


epoch:0,batch:850,lr:0.001,loss:2.0899,mean_loss:2.303,mean_f1:0.09:  85%|████████▌ | 851/1000 [03:39<00:39,  3.78it/s]

preds:[1, 1, 1, 1],label:[8, 1, 1, 2]


epoch:0,batch:851,lr:0.001,loss:2.1527,mean_loss:2.302,mean_f1:0.09:  85%|████████▌ | 852/1000 [03:39<00:38,  3.83it/s]

preds:[1, 1, 1, 1],label:[1, 3, 0, 7]


epoch:0,batch:852,lr:0.001,loss:2.2149,mean_loss:2.302,mean_f1:0.09:  85%|████████▌ | 853/1000 [03:39<00:38,  3.82it/s]

preds:[1, 1, 1, 1],label:[0, 5, 3, 2]


epoch:0,batch:853,lr:0.001,loss:2.361,mean_loss:2.302,mean_f1:0.09:  85%|█████████▍ | 854/1000 [03:39<00:38,  3.84it/s]

preds:[1, 1, 1, 1],label:[0, 6, 8, 0]


epoch:0,batch:854,lr:0.001,loss:2.5592,mean_loss:2.303,mean_f1:0.089:  86%|███████▋ | 855/1000 [03:40<00:37,  3.89it/s]

preds:[1, 1, 1, 1],label:[5, 2, 6, 2]


epoch:0,batch:855,lr:0.001,loss:1.8177,mean_loss:2.302,mean_f1:0.09:  86%|████████▌ | 856/1000 [03:40<00:36,  3.92it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 4]


epoch:0,batch:856,lr:0.001,loss:2.19,mean_loss:2.302,mean_f1:0.089:  86%|█████████▍ | 857/1000 [03:40<00:36,  3.94it/s]

preds:[1, 1, 1, 1],label:[2, 0, 2, 5]


epoch:0,batch:857,lr:0.001,loss:1.9342,mean_loss:2.302,mean_f1:0.089:  86%|███████▋ | 858/1000 [03:40<00:36,  3.93it/s]

preds:[1, 1, 1, 1],label:[0, 3, 4, 1]


epoch:0,batch:858,lr:0.001,loss:1.6523,mean_loss:2.301,mean_f1:0.09:  86%|████████▌ | 859/1000 [03:41<00:35,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 0]


epoch:0,batch:859,lr:0.001,loss:1.7405,mean_loss:2.3,mean_f1:0.09:  86%|██████████▎ | 860/1000 [03:41<00:36,  3.86it/s]

preds:[1, 1, 1, 1],label:[0, 0, 3, 0]


epoch:0,batch:860,lr:0.001,loss:2.0835,mean_loss:2.3,mean_f1:0.09:  86%|██████████▎ | 861/1000 [03:41<00:35,  3.88it/s]

preds:[0, 0, 0, 0],label:[6, 1, 0, 2]


epoch:0,batch:861,lr:0.001,loss:1.7329,mean_loss:2.299,mean_f1:0.09:  86%|████████▌ | 862/1000 [03:41<00:35,  3.91it/s]

preds:[0, 0, 0, 0],label:[3, 1, 0, 1]


epoch:0,batch:862,lr:0.001,loss:2.505,mean_loss:2.3,mean_f1:0.09:  86%|███████████▏ | 863/1000 [03:42<00:34,  3.93it/s]

preds:[0, 0, 0, 0],label:[9, 1, 3, 4]


epoch:0,batch:863,lr:0.001,loss:2.5636,mean_loss:2.3,mean_f1:0.09:  86%|██████████▎ | 864/1000 [03:42<00:34,  3.90it/s]

preds:[0, 0, 0, 0],label:[0, 6, 11, 1]


epoch:0,batch:864,lr:0.001,loss:2.6007,mean_loss:2.3,mean_f1:0.09:  86%|██████████▍ | 865/1000 [03:42<00:34,  3.91it/s]

preds:[0, 0, 0, 0],label:[10, 0, 1, 6]


epoch:0,batch:865,lr:0.001,loss:2.2637,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 866/1000 [03:43<00:34,  3.87it/s]

preds:[0, 0, 0, 0],label:[0, 7, 4, 0]


epoch:0,batch:866,lr:0.001,loss:2.2479,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 867/1000 [03:43<00:34,  3.86it/s]

preds:[0, 0, 0, 0],label:[2, 0, 7, 2]


epoch:0,batch:867,lr:0.001,loss:2.8659,mean_loss:2.301,mean_f1:0.09:  87%|████████▋ | 868/1000 [03:43<00:33,  3.90it/s]

preds:[0, 0, 0, 0],label:[0, 8, 7, 7]


epoch:0,batch:868,lr:0.001,loss:1.9453,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 869/1000 [03:43<00:33,  3.88it/s]

preds:[0, 0, 0, 0],label:[2, 4, 0, 1]


epoch:0,batch:869,lr:0.001,loss:2.2852,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 870/1000 [03:44<00:32,  3.94it/s]

preds:[0, 0, 0, 0],label:[10, 2, 0, 0]


epoch:0,batch:870,lr:0.001,loss:2.1775,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 871/1000 [03:44<00:33,  3.88it/s]

preds:[0, 0, 0, 0],label:[0, 3, 3, 8]


epoch:0,batch:871,lr:0.001,loss:2.7042,mean_loss:2.301,mean_f1:0.09:  87%|████████▋ | 872/1000 [03:44<00:34,  3.71it/s]

preds:[0, 0, 0, 0],label:[8, 1, 9, 3]


epoch:0,batch:872,lr:0.001,loss:2.4557,mean_loss:2.301,mean_f1:0.09:  87%|████████▋ | 873/1000 [03:44<00:34,  3.68it/s]

preds:[0, 0, 0, 0],label:[3, 5, 0, 7]


epoch:0,batch:873,lr:0.001,loss:1.7992,mean_loss:2.3,mean_f1:0.09:  87%|██████████▍ | 874/1000 [03:45<00:33,  3.77it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 4]


epoch:0,batch:874,lr:0.001,loss:2.1742,mean_loss:2.3,mean_f1:0.09:  88%|██████████▌ | 875/1000 [03:45<00:32,  3.83it/s]

preds:[0, 0, 0, 0],label:[4, 4, 2, 0]


epoch:0,batch:875,lr:0.001,loss:1.653,mean_loss:2.299,mean_f1:0.09:  88%|█████████▋ | 876/1000 [03:45<00:32,  3.84it/s]

preds:[0, 0, 0, 0],label:[0, 0, 1, 2]


epoch:0,batch:876,lr:0.001,loss:2.3707,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 877/1000 [03:45<00:31,  3.90it/s]

preds:[0, 0, 0, 0],label:[1, 7, 7, 0]


epoch:0,batch:877,lr:0.001,loss:2.3229,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 878/1000 [03:46<00:31,  3.87it/s]

preds:[0, 0, 0, 0],label:[3, 4, 4, 3]


epoch:0,batch:878,lr:0.001,loss:1.9514,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 879/1000 [03:46<00:30,  3.91it/s]

preds:[0, 0, 0, 0],label:[4, 0, 2, 1]


epoch:0,batch:879,lr:0.001,loss:1.9469,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 880/1000 [03:46<00:30,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 2, 4, 1]


epoch:0,batch:880,lr:0.001,loss:2.2237,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 881/1000 [03:46<00:30,  3.93it/s]

preds:[0, 0, 0, 0],label:[8, 7, 0, 0]


epoch:0,batch:881,lr:0.001,loss:2.6422,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 882/1000 [03:47<00:30,  3.93it/s]

preds:[0, 0, 0, 0],label:[2, 2, 9, 4]


epoch:0,batch:882,lr:0.001,loss:2.0614,mean_loss:2.299,mean_f1:0.09:  88%|████████▊ | 883/1000 [03:47<00:29,  3.92it/s]

preds:[0, 0, 0, 0],label:[3, 1, 4, 1]


epoch:0,batch:883,lr:0.001,loss:2.0605,mean_loss:2.298,mean_f1:0.09:  88%|████████▊ | 884/1000 [03:47<00:29,  3.93it/s]

preds:[0, 0, 0, 0],label:[7, 2, 1, 0]


epoch:0,batch:884,lr:0.001,loss:2.1789,mean_loss:2.298,mean_f1:0.09:  88%|████████▊ | 885/1000 [03:47<00:29,  3.96it/s]

preds:[0, 0, 0, 0],label:[6, 3, 0, 2]


epoch:0,batch:885,lr:0.001,loss:2.6514,mean_loss:2.299,mean_f1:0.09:  89%|████████▊ | 886/1000 [03:48<00:29,  3.89it/s]

preds:[0, 0, 0, 0],label:[11, 0, 5, 1]


epoch:0,batch:886,lr:0.001,loss:3.2466,mean_loss:2.3,mean_f1:0.09:  89%|██████████▋ | 887/1000 [03:48<00:29,  3.88it/s]

preds:[0, 0, 0, 0],label:[5, 5, 6, 6]


epoch:0,batch:887,lr:0.001,loss:2.5112,mean_loss:2.3,mean_f1:0.09:  89%|██████████▋ | 888/1000 [03:48<00:28,  3.90it/s]

preds:[0, 0, 0, 0],label:[9, 2, 2, 2]


epoch:0,batch:888,lr:0.001,loss:1.9405,mean_loss:2.3,mean_f1:0.09:  89%|██████████▋ | 889/1000 [03:48<00:28,  3.89it/s]

preds:[0, 0, 0, 0],label:[1, 0, 0, 6]


epoch:0,batch:889,lr:0.001,loss:1.6574,mean_loss:2.299,mean_f1:0.09:  89%|████████▉ | 890/1000 [03:49<00:28,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 0, 2, 0]


epoch:0,batch:890,lr:0.001,loss:2.1189,mean_loss:2.299,mean_f1:0.09:  89%|████████▉ | 891/1000 [03:49<00:27,  3.95it/s]

preds:[0, 0, 0, 0],label:[1, 0, 2, 5]


epoch:0,batch:891,lr:0.001,loss:1.7714,mean_loss:2.298,mean_f1:0.09:  89%|████████▉ | 892/1000 [03:49<00:27,  3.93it/s]

preds:[0, 0, 0, 0],label:[0, 1, 2, 1]


epoch:0,batch:892,lr:0.001,loss:3.0719,mean_loss:2.299,mean_f1:0.09:  89%|████████▉ | 893/1000 [03:49<00:27,  3.93it/s]

preds:[0, 0, 0, 0],label:[10, 2, 6, 6]


epoch:0,batch:893,lr:0.001,loss:2.1288,mean_loss:2.299,mean_f1:0.09:  89%|████████▉ | 894/1000 [03:50<00:26,  3.98it/s]

preds:[0, 0, 0, 0],label:[0, 1, 4, 7]


epoch:0,batch:894,lr:0.001,loss:2.0533,mean_loss:2.298,mean_f1:0.09:  90%|████████▉ | 895/1000 [03:50<00:26,  3.98it/s]

preds:[0, 0, 0, 0],label:[2, 6, 1, 0]


epoch:0,batch:895,lr:0.001,loss:1.7731,mean_loss:2.298,mean_f1:0.091:  90%|████████ | 896/1000 [03:50<00:26,  3.99it/s]

preds:[0, 0, 0, 0],label:[0, 0, 0, 7]


epoch:0,batch:896,lr:0.001,loss:2.2573,mean_loss:2.298,mean_f1:0.091:  90%|████████ | 897/1000 [03:51<00:27,  3.73it/s]

preds:[0, 0, 0, 0],label:[7, 3, 2, 2]


epoch:0,batch:897,lr:0.001,loss:2.0998,mean_loss:2.298,mean_f1:0.091:  90%|████████ | 898/1000 [03:51<00:26,  3.79it/s]

preds:[0, 0, 0, 0],label:[1, 3, 1, 4]


epoch:0,batch:898,lr:0.001,loss:2.7666,mean_loss:2.298,mean_f1:0.09:  90%|████████▉ | 899/1000 [03:51<00:26,  3.88it/s]

preds:[0, 0, 0, 0],label:[7, 8, 4, 4]


epoch:0,batch:899,lr:0.001,loss:1.9703,mean_loss:2.298,mean_f1:0.091:  90%|████████ | 900/1000 [03:51<00:26,  3.73it/s]

preds:[0, 0, 0, 0],label:[7, 0, 1, 1]


epoch:0,batch:900,lr:0.001,loss:2.8425,mean_loss:2.298,mean_f1:0.09:  90%|█████████ | 901/1000 [03:52<00:27,  3.58it/s]

preds:[0, 0, 0, 0],label:[9, 3, 2, 8]


epoch:0,batch:901,lr:0.001,loss:2.558,mean_loss:2.299,mean_f1:0.09:  90%|█████████▉ | 902/1000 [03:52<00:26,  3.67it/s]

preds:[0, 0, 0, 0],label:[11, 1, 4, 2]


epoch:0,batch:902,lr:0.001,loss:1.9587,mean_loss:2.298,mean_f1:0.09:  90%|█████████ | 903/1000 [03:52<00:26,  3.71it/s]

preds:[0, 0, 0, 0],label:[2, 1, 4, 1]


epoch:0,batch:903,lr:0.001,loss:2.2226,mean_loss:2.298,mean_f1:0.09:  90%|█████████ | 904/1000 [03:52<00:25,  3.77it/s]

preds:[0, 0, 0, 0],label:[2, 9, 0, 1]


epoch:0,batch:904,lr:0.001,loss:2.1077,mean_loss:2.298,mean_f1:0.09:  90%|█████████ | 905/1000 [03:53<00:24,  3.83it/s]

preds:[0, 0, 0, 0],label:[2, 4, 4, 2]


epoch:0,batch:905,lr:0.001,loss:2.0825,mean_loss:2.298,mean_f1:0.09:  91%|█████████ | 906/1000 [03:53<00:24,  3.85it/s]

preds:[0, 0, 0, 0],label:[2, 5, 0, 1]


epoch:0,batch:906,lr:0.001,loss:2.4082,mean_loss:2.298,mean_f1:0.09:  91%|█████████ | 907/1000 [03:53<00:23,  3.88it/s]

preds:[0, 0, 0, 0],label:[3, 0, 3, 5]


epoch:0,batch:907,lr:0.001,loss:2.0613,mean_loss:2.298,mean_f1:0.09:  91%|█████████ | 908/1000 [03:53<00:23,  3.90it/s]

preds:[0, 0, 0, 0],label:[3, 1, 2, 4]


epoch:0,batch:908,lr:0.001,loss:1.7157,mean_loss:2.297,mean_f1:0.09:  91%|█████████ | 909/1000 [03:54<00:23,  3.87it/s]

preds:[0, 0, 0, 0],label:[1, 2, 0, 2]


epoch:0,batch:909,lr:0.001,loss:2.1545,mean_loss:2.297,mean_f1:0.09:  91%|█████████ | 910/1000 [03:54<00:23,  3.82it/s]

preds:[0, 0, 0, 0],label:[2, 7, 3, 1]


epoch:0,batch:910,lr:0.001,loss:2.3301,mean_loss:2.297,mean_f1:0.09:  91%|█████████ | 911/1000 [03:54<00:23,  3.84it/s]

preds:[0, 0, 0, 0],label:[0, 5, 7, 1]


epoch:0,batch:911,lr:0.001,loss:2.4485,mean_loss:2.297,mean_f1:0.09:  91%|█████████ | 912/1000 [03:54<00:23,  3.82it/s]

preds:[0, 0, 0, 0],label:[4, 0, 10, 1]


epoch:0,batch:912,lr:0.001,loss:2.0191,mean_loss:2.297,mean_f1:0.09:  91%|█████████▏| 913/1000 [03:55<00:22,  3.90it/s]

preds:[1, 1, 1, 1],label:[3, 1, 2, 4]


epoch:0,batch:913,lr:0.001,loss:2.1564,mean_loss:2.297,mean_f1:0.09:  91%|█████████▏| 914/1000 [03:55<00:22,  3.89it/s]

preds:[1, 1, 1, 1],label:[6, 1, 1, 4]


epoch:0,batch:914,lr:0.001,loss:2.1228,mean_loss:2.296,mean_f1:0.09:  92%|█████████▏| 915/1000 [03:55<00:21,  3.92it/s]

preds:[1, 1, 1, 1],label:[0, 8, 2, 1]


epoch:0,batch:915,lr:0.001,loss:2.8145,mean_loss:2.297,mean_f1:0.09:  92%|█████████▏| 916/1000 [03:55<00:21,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 5, 8, 6]


epoch:0,batch:916,lr:0.001,loss:2.6399,mean_loss:2.297,mean_f1:0.09:  92%|█████████▏| 917/1000 [03:56<00:21,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 5, 11, 1]


epoch:0,batch:917,lr:0.001,loss:2.4133,mean_loss:2.297,mean_f1:0.09:  92%|█████████▏| 918/1000 [03:56<00:21,  3.88it/s]

preds:[1, 1, 1, 1],label:[2, 1, 11, 4]


epoch:0,batch:918,lr:0.001,loss:2.3948,mean_loss:2.298,mean_f1:0.09:  92%|█████████▏| 919/1000 [03:56<00:20,  3.88it/s]

preds:[1, 1, 1, 1],label:[0, 1, 5, 6]


epoch:0,batch:919,lr:0.001,loss:2.2407,mean_loss:2.298,mean_f1:0.09:  92%|█████████▏| 920/1000 [03:56<00:20,  3.87it/s]

preds:[1, 1, 1, 1],label:[7, 0, 3, 0]


epoch:0,batch:920,lr:0.001,loss:1.9438,mean_loss:2.297,mean_f1:0.09:  92%|█████████▏| 921/1000 [03:57<00:21,  3.74it/s]

preds:[1, 1, 1, 1],label:[2, 0, 3, 2]


epoch:0,batch:921,lr:0.001,loss:3.2929,mean_loss:2.298,mean_f1:0.09:  92%|█████████▏| 922/1000 [03:57<00:20,  3.73it/s]

preds:[1, 1, 1, 1],label:[9, 9, 9, 2]


epoch:0,batch:922,lr:0.001,loss:2.3745,mean_loss:2.298,mean_f1:0.09:  92%|█████████▏| 923/1000 [03:57<00:20,  3.76it/s]

preds:[1, 1, 1, 1],label:[11, 2, 1, 4]


epoch:0,batch:923,lr:0.001,loss:2.5122,mean_loss:2.299,mean_f1:0.09:  92%|█████████▏| 924/1000 [03:58<00:19,  3.81it/s]

preds:[1, 1, 1, 1],label:[0, 8, 5, 2]


epoch:0,batch:924,lr:0.001,loss:1.6161,mean_loss:2.298,mean_f1:0.09:  92%|█████████▎| 925/1000 [03:58<00:19,  3.87it/s]

preds:[1, 1, 1, 1],label:[1, 2, 2, 1]


epoch:0,batch:925,lr:0.001,loss:1.8486,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 926/1000 [03:58<00:19,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 2, 3, 2]


epoch:0,batch:926,lr:0.001,loss:2.1841,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 927/1000 [03:58<00:18,  3.89it/s]

preds:[1, 1, 1, 1],label:[1, 4, 3, 3]


epoch:0,batch:927,lr:0.001,loss:1.9143,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 928/1000 [03:59<00:18,  3.86it/s]

preds:[1, 1, 1, 1],label:[0, 4, 0, 1]


epoch:0,batch:928,lr:0.001,loss:2.1139,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 929/1000 [03:59<00:18,  3.86it/s]

preds:[1, 1, 1, 1],label:[1, 2, 0, 8]


epoch:0,batch:929,lr:0.001,loss:2.9696,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 930/1000 [03:59<00:18,  3.87it/s]

preds:[1, 1, 1, 1],label:[2, 2, 8, 12]


epoch:0,batch:930,lr:0.001,loss:1.7702,mean_loss:2.297,mean_f1:0.09:  93%|█████████▎| 931/1000 [03:59<00:17,  3.88it/s]

preds:[1, 1, 1, 1],label:[2, 1, 2, 4]


epoch:0,batch:931,lr:0.001,loss:2.0158,mean_loss:2.296,mean_f1:0.09:  93%|█████████▎| 932/1000 [04:00<00:17,  3.88it/s]

preds:[1, 1, 1, 1],label:[1, 3, 2, 3]


epoch:0,batch:932,lr:0.001,loss:2.002,mean_loss:2.296,mean_f1:0.09:  93%|██████████▎| 933/1000 [04:00<00:17,  3.90it/s]

preds:[1, 1, 1, 1],label:[0, 3, 0, 2]


epoch:0,batch:933,lr:0.001,loss:1.9441,mean_loss:2.296,mean_f1:0.09:  93%|█████████▎| 934/1000 [04:00<00:17,  3.84it/s]

preds:[1, 1, 1, 1],label:[2, 2, 5, 2]


epoch:0,batch:934,lr:0.001,loss:2.7212,mean_loss:2.296,mean_f1:0.09:  94%|█████████▎| 935/1000 [04:00<00:17,  3.82it/s]

preds:[1, 1, 1, 1],label:[7, 5, 1, 8]


epoch:0,batch:935,lr:0.001,loss:2.1403,mean_loss:2.296,mean_f1:0.09:  94%|█████████▎| 936/1000 [04:01<00:16,  3.85it/s]

preds:[2, 2, 2, 2],label:[2, 4, 6, 1]


epoch:0,batch:936,lr:0.001,loss:2.7221,mean_loss:2.296,mean_f1:0.09:  94%|█████████▎| 937/1000 [04:01<00:16,  3.87it/s]

preds:[2, 2, 2, 2],label:[0, 5, 10, 1]


epoch:0,batch:937,lr:0.001,loss:2.2883,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 938/1000 [04:01<00:16,  3.86it/s]

preds:[2, 2, 2, 2],label:[1, 0, 8, 4]


epoch:0,batch:938,lr:0.001,loss:2.2316,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 939/1000 [04:01<00:15,  3.91it/s]

preds:[2, 2, 2, 2],label:[5, 1, 4, 0]


epoch:0,batch:939,lr:0.001,loss:2.272,mean_loss:2.296,mean_f1:0.09:  94%|██████████▎| 940/1000 [04:02<00:15,  3.92it/s]

preds:[2, 2, 2, 2],label:[1, 2, 4, 9]


epoch:0,batch:940,lr:0.001,loss:1.8561,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 941/1000 [04:02<00:15,  3.93it/s]

preds:[2, 2, 2, 2],label:[3, 2, 2, 0]


epoch:0,batch:941,lr:0.001,loss:1.9976,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 942/1000 [04:02<00:14,  3.92it/s]

preds:[2, 2, 2, 2],label:[0, 1, 5, 2]


epoch:0,batch:942,lr:0.001,loss:2.2721,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 943/1000 [04:02<00:14,  3.96it/s]

preds:[2, 2, 2, 2],label:[1, 7, 0, 4]


epoch:0,batch:943,lr:0.001,loss:3.0081,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 944/1000 [04:03<00:14,  3.96it/s]

preds:[2, 2, 2, 2],label:[10, 6, 3, 0]


epoch:0,batch:944,lr:0.001,loss:2.4025,mean_loss:2.296,mean_f1:0.09:  94%|█████████▍| 945/1000 [04:03<00:14,  3.93it/s]

preds:[2, 2, 2, 2],label:[0, 0, 8, 3]


epoch:0,batch:945,lr:0.001,loss:2.5018,mean_loss:2.297,mean_f1:0.09:  95%|█████████▍| 946/1000 [04:03<00:14,  3.61it/s]

preds:[2, 2, 2, 2],label:[3, 6, 4, 0]


epoch:0,batch:946,lr:0.001,loss:1.6657,mean_loss:2.296,mean_f1:0.09:  95%|█████████▍| 947/1000 [04:04<00:14,  3.73it/s]

preds:[2, 2, 2, 2],label:[1, 2, 1, 0]


epoch:0,batch:947,lr:0.001,loss:3.1666,mean_loss:2.297,mean_f1:0.09:  95%|█████████▍| 948/1000 [04:04<00:13,  3.77it/s]

preds:[2, 2, 2, 2],label:[9, 8, 0, 11]


epoch:0,batch:948,lr:0.001,loss:1.9813,mean_loss:2.297,mean_f1:0.09:  95%|█████████▍| 949/1000 [04:04<00:13,  3.81it/s]

preds:[2, 2, 2, 2],label:[4, 1, 3, 2]


epoch:0,batch:949,lr:0.001,loss:1.8115,mean_loss:2.296,mean_f1:0.09:  95%|█████████▌| 950/1000 [04:04<00:13,  3.82it/s]

preds:[2, 2, 2, 2],label:[0, 0, 0, 1]


epoch:0,batch:950,lr:0.001,loss:2.5907,mean_loss:2.296,mean_f1:0.09:  95%|█████████▌| 951/1000 [04:05<00:12,  3.83it/s]

preds:[2, 2, 2, 2],label:[5, 1, 1, 10]


epoch:0,batch:951,lr:0.001,loss:1.7727,mean_loss:2.296,mean_f1:0.09:  95%|█████████▌| 952/1000 [04:05<00:12,  3.84it/s]

preds:[2, 2, 2, 2],label:[2, 2, 2, 4]


epoch:0,batch:952,lr:0.001,loss:2.012,mean_loss:2.296,mean_f1:0.09:  95%|██████████▍| 953/1000 [04:05<00:12,  3.83it/s]

preds:[2, 2, 2, 2],label:[5, 2, 0, 0]


epoch:0,batch:953,lr:0.001,loss:2.7053,mean_loss:2.296,mean_f1:0.09:  95%|█████████▌| 954/1000 [04:05<00:11,  3.88it/s]

preds:[2, 2, 2, 2],label:[6, 5, 1, 8]


epoch:0,batch:954,lr:0.001,loss:2.2618,mean_loss:2.296,mean_f1:0.09:  96%|█████████▌| 955/1000 [04:06<00:11,  3.94it/s]

preds:[2, 2, 2, 2],label:[6, 3, 0, 2]


epoch:0,batch:955,lr:0.001,loss:3.2251,mean_loss:2.297,mean_f1:0.09:  96%|█████████▌| 956/1000 [04:06<00:11,  3.96it/s]

preds:[2, 2, 2, 2],label:[11, 0, 11, 7]


epoch:0,batch:956,lr:0.001,loss:2.4948,mean_loss:2.297,mean_f1:0.09:  96%|█████████▌| 957/1000 [04:06<00:10,  3.94it/s]

preds:[2, 2, 2, 2],label:[0, 10, 3, 1]


epoch:0,batch:957,lr:0.001,loss:2.8294,mean_loss:2.298,mean_f1:0.09:  96%|█████████▌| 958/1000 [04:06<00:10,  3.99it/s]

preds:[2, 2, 2, 2],label:[1, 8, 10, 3]


epoch:0,batch:958,lr:0.001,loss:1.9563,mean_loss:2.297,mean_f1:0.09:  96%|█████████▌| 959/1000 [04:07<00:10,  3.92it/s]

preds:[2, 2, 2, 2],label:[2, 5, 0, 1]


epoch:0,batch:959,lr:0.001,loss:2.2707,mean_loss:2.297,mean_f1:0.09:  96%|█████████▌| 960/1000 [04:07<00:10,  3.96it/s]

preds:[0, 0, 0, 0],label:[7, 4, 1, 0]


epoch:0,batch:960,lr:0.001,loss:3.1775,mean_loss:2.298,mean_f1:0.089:  96%|████████▋| 961/1000 [04:07<00:09,  3.97it/s]

preds:[0, 0, 0, 0],label:[11, 7, 4, 6]


epoch:0,batch:961,lr:0.001,loss:2.5087,mean_loss:2.298,mean_f1:0.089:  96%|████████▋| 962/1000 [04:07<00:09,  3.89it/s]

preds:[0, 0, 0, 0],label:[6, 5, 1, 4]


epoch:0,batch:962,lr:0.001,loss:2.0483,mean_loss:2.298,mean_f1:0.089:  96%|████████▋| 963/1000 [04:08<00:09,  3.88it/s]

preds:[0, 0, 0, 0],label:[0, 6, 1, 1]


epoch:0,batch:963,lr:0.001,loss:1.8572,mean_loss:2.298,mean_f1:0.09:  96%|█████████▋| 964/1000 [04:08<00:09,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 0, 4, 2]


epoch:0,batch:964,lr:0.001,loss:2.0378,mean_loss:2.297,mean_f1:0.09:  96%|█████████▋| 965/1000 [04:08<00:08,  3.96it/s]

preds:[0, 0, 0, 0],label:[0, 0, 7, 0]


epoch:0,batch:965,lr:0.001,loss:2.291,mean_loss:2.297,mean_f1:0.09:  97%|██████████▋| 966/1000 [04:08<00:08,  3.95it/s]

preds:[0, 0, 0, 0],label:[2, 4, 1, 7]


epoch:0,batch:966,lr:0.001,loss:2.5546,mean_loss:2.298,mean_f1:0.09:  97%|█████████▋| 967/1000 [04:09<00:08,  3.93it/s]

preds:[0, 0, 0, 0],label:[6, 6, 0, 3]


epoch:0,batch:967,lr:0.001,loss:3.3758,mean_loss:2.299,mean_f1:0.09:  97%|█████████▋| 968/1000 [04:09<00:08,  3.95it/s]

preds:[0, 0, 0, 0],label:[3, 9, 13, 1]


epoch:0,batch:968,lr:0.001,loss:1.981,mean_loss:2.298,mean_f1:0.09:  97%|██████████▋| 969/1000 [04:09<00:07,  3.90it/s]

preds:[0, 0, 0, 0],label:[1, 0, 0, 8]


epoch:0,batch:969,lr:0.001,loss:2.3601,mean_loss:2.299,mean_f1:0.09:  97%|█████████▋| 970/1000 [04:09<00:08,  3.64it/s]

preds:[0, 0, 0, 0],label:[4, 0, 5, 5]


epoch:0,batch:970,lr:0.001,loss:2.0404,mean_loss:2.298,mean_f1:0.09:  97%|█████████▋| 971/1000 [04:10<00:08,  3.58it/s]

preds:[0, 0, 0, 0],label:[6, 1, 0, 2]


epoch:0,batch:971,lr:0.001,loss:1.8612,mean_loss:2.298,mean_f1:0.09:  97%|█████████▋| 972/1000 [04:10<00:07,  3.73it/s]

preds:[0, 0, 0, 0],label:[1, 1, 3, 0]


epoch:0,batch:972,lr:0.001,loss:2.7447,mean_loss:2.298,mean_f1:0.09:  97%|█████████▋| 973/1000 [04:10<00:07,  3.83it/s]

preds:[0, 0, 0, 0],label:[1, 10, 1, 9]


epoch:0,batch:973,lr:0.001,loss:2.0451,mean_loss:2.298,mean_f1:0.09:  97%|█████████▋| 974/1000 [04:10<00:06,  3.85it/s]

preds:[0, 0, 0, 0],label:[3, 0, 3, 1]


epoch:0,batch:974,lr:0.001,loss:2.4507,mean_loss:2.298,mean_f1:0.09:  98%|█████████▊| 975/1000 [04:11<00:06,  3.86it/s]

preds:[0, 0, 0, 0],label:[9, 2, 1, 4]


epoch:0,batch:975,lr:0.001,loss:2.031,mean_loss:2.298,mean_f1:0.09:  98%|██████████▋| 976/1000 [04:11<00:06,  3.87it/s]

preds:[0, 0, 0, 0],label:[6, 2, 0, 1]


epoch:0,batch:976,lr:0.001,loss:2.3003,mean_loss:2.298,mean_f1:0.09:  98%|█████████▊| 977/1000 [04:11<00:05,  3.86it/s]

preds:[0, 0, 0, 0],label:[1, 2, 10, 1]


epoch:0,batch:977,lr:0.001,loss:1.811,mean_loss:2.297,mean_f1:0.09:  98%|██████████▊| 978/1000 [04:12<00:05,  3.86it/s]

preds:[0, 0, 0, 0],label:[1, 0, 1, 3]


epoch:0,batch:978,lr:0.001,loss:1.9838,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 979/1000 [04:12<00:05,  3.81it/s]

preds:[0, 0, 0, 0],label:[3, 0, 0, 3]


epoch:0,batch:979,lr:0.001,loss:2.1066,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 980/1000 [04:12<00:05,  3.85it/s]

preds:[0, 0, 0, 0],label:[1, 1, 3, 5]


epoch:0,batch:980,lr:0.001,loss:2.4231,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 981/1000 [04:12<00:04,  3.83it/s]

preds:[0, 0, 0, 0],label:[6, 0, 2, 7]


epoch:0,batch:981,lr:0.001,loss:2.1901,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 982/1000 [04:13<00:04,  3.82it/s]

preds:[0, 0, 0, 0],label:[2, 9, 0, 0]


epoch:0,batch:982,lr:0.001,loss:2.0758,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 983/1000 [04:13<00:04,  3.84it/s]

preds:[0, 0, 0, 0],label:[1, 1, 3, 5]


epoch:0,batch:983,lr:0.001,loss:2.1636,mean_loss:2.297,mean_f1:0.09:  98%|█████████▊| 984/1000 [04:13<00:04,  3.91it/s]

preds:[0, 0, 0, 0],label:[1, 3, 4, 2]


epoch:0,batch:984,lr:0.001,loss:1.7308,mean_loss:2.296,mean_f1:0.09:  98%|█████████▊| 985/1000 [04:13<00:03,  3.96it/s]

preds:[1, 1, 1, 1],label:[1, 1, 3, 0]


epoch:0,batch:985,lr:0.001,loss:1.5082,mean_loss:2.295,mean_f1:0.09:  99%|█████████▊| 986/1000 [04:14<00:03,  3.95it/s]

preds:[1, 1, 1, 1],label:[0, 1, 1, 1]


epoch:0,batch:986,lr:0.001,loss:2.8433,mean_loss:2.296,mean_f1:0.09:  99%|█████████▊| 987/1000 [04:14<00:03,  3.94it/s]

preds:[1, 1, 1, 1],label:[8, 3, 3, 8]


epoch:0,batch:987,lr:0.001,loss:2.3055,mean_loss:2.296,mean_f1:0.09:  99%|█████████▉| 988/1000 [04:14<00:03,  3.93it/s]

preds:[1, 1, 1, 1],label:[5, 0, 8, 1]


epoch:0,batch:988,lr:0.001,loss:2.9364,mean_loss:2.296,mean_f1:0.09:  99%|█████████▉| 989/1000 [04:14<00:02,  3.87it/s]

preds:[1, 1, 1, 1],label:[8, 9, 2, 4]


epoch:0,batch:989,lr:0.001,loss:1.8188,mean_loss:2.296,mean_f1:0.09:  99%|█████████▉| 990/1000 [04:15<00:02,  3.88it/s]

preds:[1, 1, 1, 1],label:[5, 0, 1, 1]


epoch:0,batch:990,lr:0.001,loss:2.1239,mean_loss:2.296,mean_f1:0.091:  99%|████████▉| 991/1000 [04:15<00:02,  3.97it/s]

preds:[1, 1, 1, 1],label:[5, 1, 6, 1]


epoch:0,batch:991,lr:0.001,loss:2.0234,mean_loss:2.295,mean_f1:0.091:  99%|████████▉| 992/1000 [04:15<00:02,  3.98it/s]

preds:[1, 1, 1, 1],label:[0, 0, 1, 9]


epoch:0,batch:992,lr:0.001,loss:1.8946,mean_loss:2.295,mean_f1:0.091:  99%|████████▉| 993/1000 [04:15<00:01,  3.92it/s]

preds:[1, 1, 1, 1],label:[2, 0, 1, 2]


epoch:0,batch:993,lr:0.001,loss:2.3629,mean_loss:2.295,mean_f1:0.091:  99%|████████▉| 994/1000 [04:16<00:01,  3.98it/s]

preds:[1, 1, 1, 1],label:[0, 7, 6, 0]


epoch:0,batch:994,lr:0.001,loss:2.4472,mean_loss:2.295,mean_f1:0.091: 100%|████████▉| 995/1000 [04:16<00:01,  3.76it/s]

preds:[1, 1, 1, 1],label:[4, 11, 0, 1]


epoch:0,batch:995,lr:0.001,loss:1.8517,mean_loss:2.295,mean_f1:0.091: 100%|████████▉| 996/1000 [04:16<00:01,  3.85it/s]

preds:[1, 1, 1, 1],label:[1, 5, 0, 0]


epoch:0,batch:996,lr:0.001,loss:2.4206,mean_loss:2.295,mean_f1:0.09: 100%|█████████▉| 997/1000 [04:16<00:00,  3.89it/s]

preds:[1, 1, 1, 1],label:[2, 0, 9, 2]


epoch:0,batch:997,lr:0.001,loss:1.6441,mean_loss:2.294,mean_f1:0.091: 100%|████████▉| 998/1000 [04:17<00:00,  3.95it/s]

preds:[1, 1, 1, 1],label:[1, 0, 1, 2]


epoch:0,batch:998,lr:0.001,loss:1.9501,mean_loss:2.294,mean_f1:0.091: 100%|████████▉| 999/1000 [04:17<00:00,  3.89it/s]

preds:[1, 1, 1, 1],label:[0, 3, 2, 0]


epoch:0,batch:999,lr:0.001,loss:2.2416,mean_loss:2.294,mean_f1:0.091: 100%|████████| 1000/1000 [04:17<00:00,  3.88it/s]

preds:[1, 1, 1, 1],label:[1, 8, 4, 0]





　　训练不动，f1分数和损失都没有变化。初步判断是两个原因，一是文本数据的T太大，每个文本有59000多的长度，意味着一个GRU内部要进行接近6万次的传递才能输出最终的隐藏状态，在反向传播时，梯度越往前则改变越小，导致一个文本数据的前面一些文字对模型的影响越来越小，数据失去效果。二是数据的分类不均衡，导致模型只要预测为0、1、2、3就能获得相对来说还可以的结果，不需要进行学习了。  
　　对应的解决思路有三个，第一个问题有两个思路，思路一，先进行几次卷积，把Ｔ降下来，然后再给ｒｎｎ，思路二，对ｒｎｎ按照一定Ｔ距离截断反向传播，避免反向传播的梯度一直叠加，第二个问题，为ＣｒｏｓｓＥｎｔｒｏｐｙＬｏｓｓ加上权重，权重由数据集标签算得。  
　　先尝试第一个，在ｒｎｎ之前加卷积，把Ｔ给降下来。

In [1]:
import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

class MyDataset(Dataset):
    def __init__(self,csv_path):
        csv_data = pd.read_csv(csv_path,sep='\t')    
        self.text_data = csv_data.text[:4000]
        self.label_data = csv_data.label[:4000]
        
    def __getitem__(self,index):
        #所有text内的token索引增加1，0空出来代表空格，将每个text补充成57921长
        text_str = self.text_data[index]
        text_list = [int(x)+1 for x in text_str.split()]
        text_list.extend([0]*(57921-len(text_list)))
        text_array = np.array(text_list)
        label_int = self.label_data[index]
        return text_array,label_int
        
    def __len__(self):
        return len(self.text_data)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(7550,100)
        self.cnn = nn.Sequential(nn.Conv1d(57921,580,kernel_size=1),
                                 nn.Conv1d(580,58,kernel_size=1))
        
        self.rnn = nn.GRU(100,50,batch_first=True)
        self.fc = nn.Linear(50,14)
        
    def forward(self,X):
#         print('X:',X.shape)
        X = self.embedding(X)
#         print('embedding:',X.shape)
        X = self.cnn(X)
#         print('cnn:',X.shape)
        _,X = self.rnn(X)
#         print('rnn:',X.shape)
        X = X.squeeze(dim=0)
#         print('squeeze:',X.shape)
        y_hat = self.fc(X)
#         print('y_hat:',y_hat.shape)
        return y_hat

    
class MyTrain():
    def __init__(self,max_epoch=1,random_seed=1,lr=0.001,out_dir='./'):
        self.max_epoch = max_epoch
        self.random_seed = random_seed
        self.lr = lr
        self.out_dir = out_dir
        self.iter = 0
        
    def fix_random(self):
        import random
        import numpy as np
        import torch
        random.seed(self.random_seed)
        np.random.seed(self.random_seed)
        torch.random.manual_seed(self.random_seed)
        torch.cuda.random.manual_seed_all(self.random_seed)
        torch.backends.cudnn.deterministic = True
        print(f'random seed:{self.random_seed}')
        
    def my_train(self):
        
        max_epoch,lr = self.max_epoch,self.lr
        if self.random_seed is not None:
            self.fix_random()
        my_dataset = MyDataset('./train_set.csv')
        my_model = MyModel()
        my_model.train()          #将模型设置为训练模式
        my_optim = torch.optim.Adam(my_model.parameters(),lr=lr)
        my_loss = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            my_model.cuda()
            my_loss.cuda()
        print(f'train device:{next(iter(my_model.parameters())).device}')  #显示训练设备
        
        for epoch_index in range(max_epoch):
            loss_list = []
            f1_score_list = []
            best_f1_score = 0
            my_dataloader = DataLoader(my_dataset,batch_size=5,shuffle=True)
            my_dataloader = tqdm(my_dataloader)
            for batch_index,batch_data in enumerate(my_dataloader):
                batch_text,batch_label = batch_data
#                 print('batch_text:',batch_text.shape)
#                 print('batch_label:',batch_label)
                if torch.cuda.is_available():
                    batch_text = batch_text.cuda()
                    batch_label = batch_label.cuda()
                    
                y_hat = my_model(batch_text)
                batch_loss = my_loss(y_hat,batch_label)
                
                my_optim.zero_grad()
                batch_loss.backward()
#                 for i in my_model.parameters():
#                     print('grad:',i.grad)
                my_optim.step()
                my_optim.param_groups[0]['lr'] = lr*(0.8**((self.iter//1000)%10))
                
                #显示batch结果
                batch_lr = round(my_optim.param_groups[0]['lr'],5)
                batch_loss = round(batch_loss.item(),4)
                loss_list.append(batch_loss)
                mean_loss = round((sum(loss_list)/len(loss_list)),3)

                batch_f1_score = self.f1_score(y_hat.data,batch_label.data)
                f1_score_list.append(batch_f1_score)
                mean_f1 = round(sum(f1_score_list)/len(f1_score_list),3)
                my_dataloader.set_description(f'epoch:{epoch_index},batch:{batch_index},lr:{batch_lr},loss:{batch_loss},mean_loss:{mean_loss},mean_f1:{mean_f1}')
                
                #存储模型
                torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_last'))                
                if batch_f1_score>best_f1_score:
                    torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_best'))
                    best_f1_score = batch_f1_score
                    
                self.iter += 1
                
    def f1_score(self,y_hat,label,eps=1e-8):
        #y_hat(N,C),label(1)
        y_hat = y_hat.cpu()
        label = label.cpu()
        preds_list = list(torch.argmax(y_hat,dim=1).numpy())
        label_list = list(label.numpy())
        print(f'preds:{preds_list},label:{label_list}')
        class_index_list = []
        for class_index in label_list:
            if class_index not in class_index_list:
                class_index_list.append(class_index)

        f1_score_list = []
        for index in class_index_list:
            if index not in preds_list:
                sub_f1_score = 0
            else:
                tp = 0
                fp = 0
                fn = 0
                for i in range(len(preds_list)):
                    if preds_list[i] == index and label_list[i] == index:
                        tp+=1
                    if preds_list[i] == index and label_list[i] != index: 
                        fp+=1
                    if preds_list[i] != index and label_list[i] == index:
                        fn+=1
                prec_val = tp/(tp+fp) 
                recall_val = tp/(tp+fn)
                sub_f1_score = 2*(prec_val*recall_val)/(prec_val+recall_val+eps)
            f1_score_list.append(sub_f1_score)

        batch_f1_score = sum(f1_score_list)/len(f1_score_list)

        return batch_f1_score


In [2]:
if __name__ == '__main__':
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    MyTrain(max_epoch=1,random_seed=None).my_train()

train device:cuda:0


epoch:0,batch:0,lr:0.001,loss:2.6065,mean_loss:2.607,mean_f1:0.143:   0%|                      | 0/800 [00:01<?, ?it/s]

preds:[1, 1, 1, 1, 1],label:[4, 1, 8, 0, 1]


epoch:0,batch:1,lr:0.001,loss:2.8546,mean_loss:2.731,mean_f1:0.071:   0%|              | 1/800 [00:04<55:12,  4.15s/it]

preds:[3, 3, 3, 3, 3],label:[9, 2, 0, 0, 0]


epoch:0,batch:2,lr:0.001,loss:2.4917,mean_loss:2.651,mean_f1:0.075:   0%|              | 2/800 [00:05<31:26,  2.36s/it]

preds:[3, 3, 3, 3, 3],label:[4, 3, 2, 2, 6]


epoch:0,batch:3,lr:0.001,loss:2.5861,mean_loss:2.635,mean_f1:0.073:   0%|              | 3/800 [00:06<23:06,  1.74s/it]

preds:[0, 0, 0, 0, 0],label:[11, 2, 0, 1, 5]


epoch:0,batch:4,lr:0.001,loss:2.4494,mean_loss:2.598,mean_f1:0.075:   0%|              | 4/800 [00:07<19:07,  1.44s/it]

preds:[0, 0, 0, 0, 0],label:[9, 0, 1, 2, 1]


epoch:0,batch:5,lr:0.001,loss:2.5774,mean_loss:2.594,mean_f1:0.063:   1%|              | 5/800 [00:08<16:41,  1.26s/it]

preds:[0, 0, 0, 0, 0],label:[1, 2, 4, 4, 1]


epoch:0,batch:6,lr:0.001,loss:2.2601,mean_loss:2.547,mean_f1:0.07:   1%|               | 6/800 [00:09<15:24,  1.16s/it]

preds:[0, 0, 0, 0, 0],label:[0, 2, 1, 2, 2]


epoch:0,batch:7,lr:0.001,loss:2.466,mean_loss:2.536,mean_f1:0.071:   1%|▏              | 7/800 [00:10<15:00,  1.14s/it]

preds:[0, 0, 0, 0, 0],label:[2, 13, 6, 0, 2]


epoch:0,batch:8,lr:0.001,loss:2.3454,mean_loss:2.515,mean_f1:0.071:   1%|▏             | 8/800 [00:11<14:53,  1.13s/it]

preds:[2, 2, 2, 2, 2],label:[1, 2, 4, 11, 8]


epoch:0,batch:9,lr:0.001,loss:2.3219,mean_loss:2.496,mean_f1:0.072:   1%|▏             | 9/800 [00:12<14:09,  1.07s/it]

preds:[1, 1, 1, 1, 1],label:[6, 2, 0, 0, 1]


epoch:0,batch:10,lr:0.001,loss:2.5186,mean_loss:2.498,mean_f1:0.073:   1%|▏           | 10/800 [00:13<13:43,  1.04s/it]

preds:[1, 1, 1, 1, 1],label:[3, 1, 3, 0, 4]


epoch:0,batch:11,lr:0.001,loss:2.2422,mean_loss:2.477,mean_f1:0.074:   1%|▏           | 11/800 [00:14<13:20,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[2, 9, 1, 0, 0]


epoch:0,batch:12,lr:0.001,loss:3.1921,mean_loss:2.532,mean_f1:0.068:   2%|▏           | 12/800 [00:15<13:19,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[3, 5, 4, 4, 4]


epoch:0,batch:13,lr:0.001,loss:2.1323,mean_loss:2.503,mean_f1:0.074:   2%|▏           | 13/800 [00:16<13:12,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[8, 2, 1, 1, 0]


epoch:0,batch:14,lr:0.001,loss:2.6506,mean_loss:2.513,mean_f1:0.069:   2%|▏           | 14/800 [00:17<15:17,  1.17s/it]

preds:[1, 1, 1, 1, 1],label:[4, 2, 3, 2, 0]


epoch:0,batch:15,lr:0.001,loss:2.494,mean_loss:2.512,mean_f1:0.064:   2%|▏            | 15/800 [00:18<14:22,  1.10s/it]

preds:[1, 1, 1, 1, 1],label:[6, 2, 4, 0, 0]


epoch:0,batch:16,lr:0.001,loss:2.4843,mean_loss:2.51,mean_f1:0.065:   2%|▎            | 16/800 [00:19<14:07,  1.08s/it]

preds:[1, 1, 1, 1, 1],label:[2, 2, 1, 3, 4]


epoch:0,batch:17,lr:0.001,loss:2.5959,mean_loss:2.515,mean_f1:0.062:   2%|▎           | 17/800 [00:20<13:31,  1.04s/it]

preds:[1, 1, 1, 1, 1],label:[3, 3, 6, 4, 0]


epoch:0,batch:18,lr:0.001,loss:3.0231,mean_loss:2.542,mean_f1:0.059:   2%|▎           | 18/800 [00:21<13:09,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[7, 2, 2, 3, 3]


epoch:0,batch:19,lr:0.001,loss:2.3095,mean_loss:2.53,mean_f1:0.06:   2%|▎             | 19/800 [00:22<12:52,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[1, 3, 3, 2, 8]


epoch:0,batch:20,lr:0.001,loss:2.308,mean_loss:2.52,mean_f1:0.057:   2%|▎             | 20/800 [00:23<12:39,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[2, 9, 0, 2, 6]


epoch:0,batch:21,lr:0.001,loss:2.4854,mean_loss:2.518,mean_f1:0.054:   3%|▎           | 21/800 [00:24<12:27,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[2, 2, 2, 13, 6]


epoch:0,batch:22,lr:0.001,loss:2.6895,mean_loss:2.525,mean_f1:0.052:   3%|▎           | 22/800 [00:25<12:23,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[5, 0, 2, 5, 2]


epoch:0,batch:23,lr:0.001,loss:2.8134,mean_loss:2.537,mean_f1:0.05:   3%|▎            | 23/800 [00:26<12:19,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[7, 8, 0, 8, 3]


epoch:0,batch:24,lr:0.001,loss:3.1263,mean_loss:2.561,mean_f1:0.051:   3%|▎           | 24/800 [00:27<12:15,  1.06it/s]

preds:[2, 2, 2, 2, 2],label:[7, 0, 8, 7, 2]


epoch:0,batch:25,lr:0.001,loss:2.3294,mean_loss:2.552,mean_f1:0.049:   3%|▍           | 25/800 [00:28<12:17,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[8, 3, 1, 3, 8]


epoch:0,batch:26,lr:0.001,loss:2.0931,mean_loss:2.535,mean_f1:0.054:   3%|▍           | 26/800 [00:29<12:27,  1.04it/s]

preds:[2, 2, 2, 2, 2],label:[1, 2, 2, 1, 4]


epoch:0,batch:27,lr:0.001,loss:2.9922,mean_loss:2.551,mean_f1:0.053:   3%|▍           | 27/800 [00:31<16:02,  1.25s/it]

preds:[2, 2, 2, 2, 2],label:[4, 3, 7, 4, 8]


epoch:0,batch:28,lr:0.001,loss:2.4255,mean_loss:2.547,mean_f1:0.053:   4%|▍           | 28/800 [00:32<15:04,  1.17s/it]

preds:[2, 2, 2, 2, 2],label:[4, 0, 3, 2, 5]


epoch:0,batch:29,lr:0.001,loss:2.1478,mean_loss:2.534,mean_f1:0.055:   4%|▍           | 29/800 [00:33<14:09,  1.10s/it]

preds:[2, 2, 2, 2, 2],label:[2, 1, 1, 1, 4]


epoch:0,batch:30,lr:0.001,loss:2.2532,mean_loss:2.525,mean_f1:0.056:   4%|▍           | 30/800 [00:34<13:34,  1.06s/it]

preds:[2, 2, 2, 2, 2],label:[3, 3, 4, 1, 2]


epoch:0,batch:31,lr:0.001,loss:2.0151,mean_loss:2.509,mean_f1:0.059:   4%|▍           | 31/800 [00:35<13:22,  1.04s/it]

preds:[2, 2, 2, 2, 2],label:[1, 8, 0, 2, 2]


epoch:0,batch:32,lr:0.001,loss:2.5112,mean_loss:2.509,mean_f1:0.057:   4%|▍           | 32/800 [00:36<12:55,  1.01s/it]

preds:[2, 2, 2, 2, 2],label:[0, 1, 0, 13, 4]


epoch:0,batch:33,lr:0.001,loss:2.5779,mean_loss:2.511,mean_f1:0.055:   4%|▍           | 33/800 [00:37<12:40,  1.01it/s]

preds:[2, 2, 2, 2, 2],label:[1, 7, 1, 0, 0]


epoch:0,batch:34,lr:0.001,loss:2.113,mean_loss:2.5,mean_f1:0.057:   4%|▋              | 34/800 [00:38<12:36,  1.01it/s]

preds:[2, 2, 2, 2, 2],label:[2, 1, 1, 0, 0]


epoch:0,batch:35,lr:0.001,loss:3.1288,mean_loss:2.517,mean_f1:0.055:   4%|▌           | 35/800 [00:38<12:24,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[12, 4, 11, 7, 0]


epoch:0,batch:36,lr:0.001,loss:2.3,mean_loss:2.511,mean_f1:0.054:   4%|▋              | 36/800 [00:39<12:15,  1.04it/s]

preds:[2, 2, 2, 2, 2],label:[8, 3, 4, 1, 4]


epoch:0,batch:37,lr:0.001,loss:2.5945,mean_loss:2.513,mean_f1:0.054:   5%|▌           | 37/800 [00:40<12:18,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[1, 7, 2, 5, 0]


epoch:0,batch:38,lr:0.001,loss:2.2832,mean_loss:2.507,mean_f1:0.055:   5%|▌           | 38/800 [00:41<12:28,  1.02it/s]

preds:[2, 2, 2, 2, 2],label:[0, 5, 0, 0, 2]


epoch:0,batch:39,lr:0.001,loss:2.3248,mean_loss:2.503,mean_f1:0.054:   5%|▌           | 39/800 [00:42<12:16,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[0, 1, 9, 1, 1]


epoch:0,batch:40,lr:0.001,loss:2.6535,mean_loss:2.507,mean_f1:0.053:   5%|▌           | 40/800 [00:43<12:17,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[6, 1, 5, 5, 4]


epoch:0,batch:41,lr:0.001,loss:2.0797,mean_loss:2.496,mean_f1:0.054:   5%|▌           | 41/800 [00:44<12:10,  1.04it/s]

preds:[2, 2, 2, 2, 2],label:[1, 1, 3, 2, 3]


epoch:0,batch:42,lr:0.001,loss:2.9002,mean_loss:2.506,mean_f1:0.053:   5%|▋           | 42/800 [00:45<12:16,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[6, 4, 6, 5, 13]


epoch:0,batch:43,lr:0.001,loss:2.8357,mean_loss:2.513,mean_f1:0.052:   5%|▋           | 43/800 [00:46<12:06,  1.04it/s]

preds:[2, 2, 2, 2, 2],label:[9, 4, 4, 3, 7]


epoch:0,batch:44,lr:0.001,loss:2.0957,mean_loss:2.504,mean_f1:0.052:   6%|▋           | 44/800 [00:47<12:00,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[3, 3, 0, 2, 1]


epoch:0,batch:45,lr:0.001,loss:1.9492,mean_loss:2.492,mean_f1:0.057:   6%|▋           | 45/800 [00:48<11:57,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[3, 2, 2, 2, 4]


epoch:0,batch:46,lr:0.001,loss:2.7341,mean_loss:2.497,mean_f1:0.055:   6%|▋           | 46/800 [00:50<15:24,  1.23s/it]

preds:[2, 2, 2, 2, 2],label:[0, 1, 5, 4, 10]


epoch:0,batch:47,lr:0.001,loss:2.6558,mean_loss:2.5,mean_f1:0.054:   6%|▊             | 47/800 [00:51<14:34,  1.16s/it]

preds:[2, 2, 2, 2, 2],label:[6, 11, 1, 6, 0]


epoch:0,batch:48,lr:0.001,loss:2.2719,mean_loss:2.496,mean_f1:0.057:   6%|▋           | 48/800 [00:52<13:41,  1.09s/it]

preds:[2, 2, 2, 2, 2],label:[2, 5, 0, 5, 2]


epoch:0,batch:49,lr:0.001,loss:2.1975,mean_loss:2.49,mean_f1:0.061:   6%|▊            | 49/800 [00:53<13:05,  1.05s/it]

preds:[2, 2, 2, 2, 2],label:[12, 2, 1, 2, 2]


epoch:0,batch:50,lr:0.001,loss:3.271,mean_loss:2.505,mean_f1:0.06:   6%|▉             | 50/800 [00:54<12:37,  1.01s/it]

preds:[2, 2, 2, 2, 2],label:[0, 1, 11, 10, 10]


epoch:0,batch:51,lr:0.001,loss:2.5531,mean_loss:2.506,mean_f1:0.059:   6%|▊           | 51/800 [00:55<12:14,  1.02it/s]

preds:[2, 2, 2, 2, 2],label:[4, 6, 1, 3, 9]


epoch:0,batch:52,lr:0.001,loss:2.6425,mean_loss:2.509,mean_f1:0.059:   6%|▊           | 52/800 [00:56<12:04,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[1, 10, 6, 3, 2]


epoch:0,batch:53,lr:0.001,loss:1.9473,mean_loss:2.498,mean_f1:0.061:   7%|▊           | 53/800 [00:57<12:24,  1.00it/s]

preds:[2, 2, 2, 2, 2],label:[1, 4, 2, 4, 2]


epoch:0,batch:54,lr:0.001,loss:1.9132,mean_loss:2.488,mean_f1:0.065:   7%|▊           | 54/800 [00:58<12:10,  1.02it/s]

preds:[2, 2, 2, 2, 2],label:[3, 2, 2, 0, 2]


epoch:0,batch:55,lr:0.001,loss:1.9976,mean_loss:2.479,mean_f1:0.065:   7%|▊           | 55/800 [00:59<12:07,  1.02it/s]

preds:[2, 2, 2, 2, 2],label:[2, 0, 3, 0, 1]


epoch:0,batch:56,lr:0.001,loss:2.0555,mean_loss:2.471,mean_f1:0.065:   7%|▊           | 56/800 [00:59<11:59,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[0, 1, 2, 6, 1]


epoch:0,batch:57,lr:0.001,loss:2.1729,mean_loss:2.466,mean_f1:0.064:   7%|▊           | 57/800 [01:01<12:17,  1.01it/s]

preds:[2, 2, 2, 2, 2],label:[1, 6, 3, 1, 0]


epoch:0,batch:58,lr:0.001,loss:2.5427,mean_loss:2.468,mean_f1:0.063:   7%|▊           | 58/800 [01:01<12:04,  1.02it/s]

preds:[2, 2, 2, 2, 2],label:[1, 12, 0, 0, 5]


epoch:0,batch:59,lr:0.001,loss:2.1145,mean_loss:2.462,mean_f1:0.064:   7%|▉           | 59/800 [01:03<12:27,  1.01s/it]

preds:[2, 2, 2, 2, 2],label:[0, 2, 3, 5, 2]


epoch:0,batch:60,lr:0.001,loss:3.0431,mean_loss:2.471,mean_f1:0.063:   8%|▉           | 60/800 [01:04<12:17,  1.00it/s]

preds:[2, 2, 2, 2, 2],label:[5, 10, 5, 9, 1]


epoch:0,batch:61,lr:0.001,loss:2.3851,mean_loss:2.47,mean_f1:0.062:   8%|▉            | 61/800 [01:04<11:59,  1.03it/s]

preds:[2, 2, 2, 2, 2],label:[9, 3, 0, 1, 3]


epoch:0,batch:62,lr:0.001,loss:2.4782,mean_loss:2.47,mean_f1:0.061:   8%|█            | 62/800 [01:05<11:46,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[1, 1, 3, 13, 5]


epoch:0,batch:63,lr:0.001,loss:2.3741,mean_loss:2.468,mean_f1:0.061:   8%|▉           | 63/800 [01:06<11:43,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[4, 0, 2, 8, 6]


epoch:0,batch:64,lr:0.001,loss:2.0798,mean_loss:2.462,mean_f1:0.063:   8%|▉           | 64/800 [01:07<11:41,  1.05it/s]

preds:[2, 2, 2, 2, 2],label:[9, 1, 2, 1, 2]


epoch:0,batch:65,lr:0.001,loss:2.0572,mean_loss:2.456,mean_f1:0.064:   8%|▉           | 65/800 [01:08<11:33,  1.06it/s]

preds:[2, 2, 2, 2, 2],label:[4, 2, 0, 3, 0]


epoch:0,batch:66,lr:0.001,loss:2.251,mean_loss:2.453,mean_f1:0.063:   8%|█            | 66/800 [01:09<11:43,  1.04it/s]

preds:[2, 2, 2, 2, 2],label:[0, 10, 1, 1, 1]


epoch:0,batch:67,lr:0.001,loss:2.0724,mean_loss:2.448,mean_f1:0.063:   8%|█           | 67/800 [01:10<11:42,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[4, 1, 0, 0, 4]


epoch:0,batch:68,lr:0.001,loss:2.2812,mean_loss:2.445,mean_f1:0.064:   8%|█           | 68/800 [01:11<11:41,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[5, 2, 1, 4, 6]


epoch:0,batch:69,lr:0.001,loss:2.6335,mean_loss:2.448,mean_f1:0.063:   9%|█           | 69/800 [01:12<11:34,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[3, 0, 7, 8, 0]


epoch:0,batch:70,lr:0.001,loss:2.5378,mean_loss:2.449,mean_f1:0.063:   9%|█           | 70/800 [01:13<11:31,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[6, 4, 1, 7, 0]


epoch:0,batch:71,lr:0.001,loss:2.2288,mean_loss:2.446,mean_f1:0.064:   9%|█           | 71/800 [01:14<11:31,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[5, 1, 1, 8, 0]


epoch:0,batch:72,lr:0.001,loss:3.053,mean_loss:2.454,mean_f1:0.064:   9%|█▏           | 72/800 [01:15<11:28,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[11, 3, 9, 1, 12]


epoch:0,batch:73,lr:0.001,loss:2.2095,mean_loss:2.451,mean_f1:0.064:   9%|█           | 73/800 [01:16<11:31,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[4, 0, 6, 3, 1]


epoch:0,batch:74,lr:0.001,loss:2.2883,mean_loss:2.449,mean_f1:0.063:   9%|█           | 74/800 [01:17<11:26,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[2, 0, 0, 0, 10]


epoch:0,batch:75,lr:0.001,loss:2.2304,mean_loss:2.446,mean_f1:0.064:   9%|█▏          | 75/800 [01:18<11:28,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[2, 1, 6, 8, 1]


epoch:0,batch:76,lr:0.001,loss:2.5405,mean_loss:2.447,mean_f1:0.063:  10%|█▏          | 76/800 [01:19<11:29,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[2, 9, 2, 5, 5]


epoch:0,batch:77,lr:0.001,loss:2.5963,mean_loss:2.449,mean_f1:0.062:  10%|█▏          | 77/800 [01:20<11:27,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[7, 0, 3, 8, 0]


epoch:0,batch:78,lr:0.001,loss:2.7712,mean_loss:2.453,mean_f1:0.062:  10%|█▏          | 78/800 [01:21<11:26,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[13, 4, 5, 4, 3]


epoch:0,batch:79,lr:0.001,loss:2.1118,mean_loss:2.449,mean_f1:0.064:  10%|█▏          | 79/800 [01:22<11:23,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[1, 2, 1, 7, 1]


epoch:0,batch:80,lr:0.001,loss:2.0855,mean_loss:2.445,mean_f1:0.064:  10%|█▏          | 80/800 [01:22<11:23,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[2, 4, 1, 5, 0]


epoch:0,batch:81,lr:0.001,loss:2.5129,mean_loss:2.445,mean_f1:0.063:  10%|█▏          | 81/800 [01:23<11:35,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[0, 10, 3, 0, 5]


epoch:0,batch:82,lr:0.001,loss:2.301,mean_loss:2.444,mean_f1:0.063:  10%|█▎           | 82/800 [01:24<11:31,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[2, 1, 3, 11, 0]


epoch:0,batch:83,lr:0.001,loss:2.7226,mean_loss:2.447,mean_f1:0.063:  10%|█▏          | 83/800 [01:25<11:42,  1.02it/s]

preds:[1, 1, 1, 1, 1],label:[4, 0, 7, 1, 12]


epoch:0,batch:84,lr:0.001,loss:2.1729,mean_loss:2.444,mean_f1:0.063:  10%|█▎          | 84/800 [01:27<12:00,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[2, 0, 3, 6, 2]


epoch:0,batch:85,lr:0.001,loss:1.7871,mean_loss:2.436,mean_f1:0.065:  11%|█▎          | 85/800 [01:28<11:58,  1.00s/it]

preds:[1, 1, 1, 1, 1],label:[1, 1, 1, 3, 0]


epoch:0,batch:86,lr:0.001,loss:2.6324,mean_loss:2.438,mean_f1:0.064:  11%|█▎          | 86/800 [01:29<12:41,  1.07s/it]

preds:[1, 1, 1, 1, 1],label:[4, 6, 6, 8, 0]


epoch:0,batch:87,lr:0.001,loss:2.7479,mean_loss:2.442,mean_f1:0.063:  11%|█▎          | 87/800 [01:30<12:26,  1.05s/it]

preds:[1, 1, 1, 1, 1],label:[2, 0, 7, 6, 9]


epoch:0,batch:88,lr:0.001,loss:2.5349,mean_loss:2.443,mean_f1:0.063:  11%|█▎          | 88/800 [01:31<12:09,  1.02s/it]

preds:[1, 1, 1, 1, 1],label:[3, 5, 5, 5, 3]


epoch:0,batch:89,lr:0.001,loss:2.2468,mean_loss:2.441,mean_f1:0.063:  11%|█▎          | 89/800 [01:32<11:59,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[0, 9, 4, 1, 2]


epoch:0,batch:90,lr:0.001,loss:2.121,mean_loss:2.437,mean_f1:0.064:  11%|█▍           | 90/800 [01:33<11:40,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[1, 0, 0, 10, 1]


epoch:0,batch:91,lr:0.001,loss:1.9345,mean_loss:2.432,mean_f1:0.064:  11%|█▎          | 91/800 [01:34<11:31,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[2, 0, 2, 1, 3]


epoch:0,batch:92,lr:0.001,loss:2.7602,mean_loss:2.435,mean_f1:0.064:  12%|█▍          | 92/800 [01:35<11:41,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[3, 3, 7, 1, 11]


epoch:0,batch:93,lr:0.001,loss:1.9624,mean_loss:2.43,mean_f1:0.066:  12%|█▌           | 93/800 [01:36<11:31,  1.02it/s]

preds:[1, 1, 1, 1, 1],label:[2, 1, 1, 2, 4]


epoch:0,batch:94,lr:0.001,loss:2.7467,mean_loss:2.434,mean_f1:0.066:  12%|█▍          | 94/800 [01:37<11:37,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[2, 3, 10, 1, 10]


epoch:0,batch:95,lr:0.001,loss:2.0404,mean_loss:2.429,mean_f1:0.065:  12%|█▍          | 95/800 [01:38<11:35,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[2, 4, 0, 2, 2]


epoch:0,batch:96,lr:0.001,loss:1.9761,mean_loss:2.425,mean_f1:0.065:  12%|█▍          | 96/800 [01:38<11:27,  1.02it/s]

preds:[1, 1, 1, 1, 1],label:[3, 1, 2, 3, 0]


epoch:0,batch:97,lr:0.001,loss:2.397,mean_loss:2.425,mean_f1:0.065:  12%|█▌           | 97/800 [01:39<11:24,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[5, 3, 2, 1, 9]


epoch:0,batch:98,lr:0.001,loss:2.5068,mean_loss:2.425,mean_f1:0.065:  12%|█▍          | 98/800 [01:40<11:16,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[9, 4, 3, 0, 5]


epoch:0,batch:99,lr:0.001,loss:2.2606,mean_loss:2.424,mean_f1:0.064:  12%|█▍          | 99/800 [01:41<11:25,  1.02it/s]

preds:[1, 1, 1, 1, 1],label:[0, 3, 0, 3, 9]


epoch:0,batch:100,lr:0.001,loss:2.2855,mean_loss:2.422,mean_f1:0.064:  12%|█▎        | 100/800 [01:42<11:18,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[1, 3, 0, 4, 8]


epoch:0,batch:101,lr:0.001,loss:2.4699,mean_loss:2.423,mean_f1:0.064:  13%|█▎        | 101/800 [01:43<11:09,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[12, 3, 3, 3, 0]


epoch:0,batch:102,lr:0.001,loss:2.4812,mean_loss:2.423,mean_f1:0.063:  13%|█▎        | 102/800 [01:44<11:01,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[6, 4, 4, 2, 4]


epoch:0,batch:103,lr:0.001,loss:3.1211,mean_loss:2.43,mean_f1:0.062:  13%|█▍         | 103/800 [01:45<11:01,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[3, 11, 4, 10, 8]


epoch:0,batch:104,lr:0.001,loss:2.1243,mean_loss:2.427,mean_f1:0.062:  13%|█▎        | 104/800 [01:46<10:56,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[3, 0, 0, 9, 0]


epoch:0,batch:105,lr:0.001,loss:2.0212,mean_loss:2.423,mean_f1:0.062:  13%|█▎        | 105/800 [01:47<10:53,  1.06it/s]

preds:[0, 0, 0, 0, 0],label:[5, 1, 0, 2, 2]


epoch:0,batch:106,lr:0.001,loss:2.2305,mean_loss:2.422,mean_f1:0.062:  13%|█▎        | 106/800 [01:48<10:52,  1.06it/s]

preds:[0, 0, 0, 0, 0],label:[4, 0, 3, 6, 3]


epoch:0,batch:107,lr:0.001,loss:2.405,mean_loss:2.421,mean_f1:0.063:  13%|█▍         | 107/800 [01:49<11:00,  1.05it/s]

preds:[0, 0, 0, 0, 0],label:[5, 11, 3, 0, 0]


epoch:0,batch:108,lr:0.001,loss:3.0031,mean_loss:2.427,mean_f1:0.062:  14%|█▎        | 108/800 [01:50<10:57,  1.05it/s]

preds:[0, 0, 0, 0, 0],label:[1, 2, 11, 10, 7]


epoch:0,batch:109,lr:0.001,loss:2.286,mean_loss:2.425,mean_f1:0.063:  14%|█▍         | 109/800 [01:51<11:08,  1.03it/s]

preds:[0, 0, 0, 0, 0],label:[1, 1, 0, 12, 1]


epoch:0,batch:110,lr:0.001,loss:2.4962,mean_loss:2.426,mean_f1:0.062:  14%|█▍        | 110/800 [01:52<11:12,  1.03it/s]

preds:[0, 0, 0, 0, 0],label:[5, 4, 1, 1, 7]


epoch:0,batch:111,lr:0.001,loss:2.2035,mean_loss:2.424,mean_f1:0.062:  14%|█▍        | 111/800 [01:53<11:08,  1.03it/s]

preds:[0, 0, 0, 0, 0],label:[4, 1, 4, 3, 4]


epoch:0,batch:112,lr:0.001,loss:2.5186,mean_loss:2.425,mean_f1:0.062:  14%|█▍        | 112/800 [01:54<11:04,  1.04it/s]

preds:[0, 0, 0, 0, 0],label:[7, 0, 8, 1, 2]


epoch:0,batch:113,lr:0.001,loss:2.5525,mean_loss:2.426,mean_f1:0.062:  14%|█▍        | 113/800 [01:55<11:01,  1.04it/s]

preds:[0, 0, 0, 0, 0],label:[8, 1, 6, 0, 5]


epoch:0,batch:114,lr:0.001,loss:2.5109,mean_loss:2.427,mean_f1:0.061:  14%|█▍        | 114/800 [01:56<10:58,  1.04it/s]

preds:[0, 0, 0, 0, 0],label:[3, 1, 2, 8, 8]


epoch:0,batch:115,lr:0.001,loss:1.981,mean_loss:2.423,mean_f1:0.061:  14%|█▌         | 115/800 [01:57<11:02,  1.03it/s]

preds:[0, 0, 0, 0, 0],label:[3, 4, 3, 2, 0]


epoch:0,batch:116,lr:0.001,loss:2.2133,mean_loss:2.421,mean_f1:0.061:  14%|█▍        | 116/800 [01:58<10:59,  1.04it/s]

preds:[0, 0, 0, 0, 0],label:[0, 4, 2, 6, 1]


epoch:0,batch:117,lr:0.001,loss:2.8637,mean_loss:2.425,mean_f1:0.061:  15%|█▍        | 117/800 [01:59<10:55,  1.04it/s]

preds:[0, 0, 0, 0, 0],label:[7, 11, 1, 1, 8]


epoch:0,batch:118,lr:0.001,loss:1.9553,mean_loss:2.421,mean_f1:0.061:  15%|█▍        | 118/800 [02:00<10:49,  1.05it/s]

preds:[0, 0, 0, 0, 0],label:[0, 2, 1, 2, 2]


epoch:0,batch:119,lr:0.001,loss:2.1506,mean_loss:2.419,mean_f1:0.062:  15%|█▍        | 119/800 [02:00<10:46,  1.05it/s]

preds:[0, 0, 0, 0, 0],label:[8, 1, 0, 1, 2]


epoch:0,batch:120,lr:0.001,loss:2.4472,mean_loss:2.419,mean_f1:0.061:  15%|█▌        | 120/800 [02:01<10:42,  1.06it/s]

preds:[0, 0, 0, 0, 0],label:[4, 2, 7, 4, 2]


epoch:0,batch:121,lr:0.001,loss:2.4523,mean_loss:2.419,mean_f1:0.061:  15%|█▌        | 121/800 [02:02<10:42,  1.06it/s]

preds:[0, 0, 0, 0, 0],label:[7, 3, 5, 0, 4]


epoch:0,batch:122,lr:0.001,loss:2.2028,mean_loss:2.417,mean_f1:0.061:  15%|█▌        | 122/800 [02:03<10:40,  1.06it/s]

preds:[0, 0, 0, 0, 0],label:[2, 1, 6, 0, 4]


epoch:0,batch:123,lr:0.001,loss:2.2472,mean_loss:2.416,mean_f1:0.062:  15%|█▌        | 123/800 [02:04<10:40,  1.06it/s]

preds:[1, 1, 1, 1, 1],label:[1, 1, 3, 4, 9]


epoch:0,batch:124,lr:0.001,loss:2.2051,mean_loss:2.414,mean_f1:0.062:  16%|█▌        | 124/800 [02:05<11:23,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[4, 2, 8, 1, 1]


epoch:0,batch:125,lr:0.001,loss:2.6455,mean_loss:2.416,mean_f1:0.062:  16%|█▌        | 125/800 [02:06<11:11,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[1, 0, 13, 2, 6]


epoch:0,batch:126,lr:0.001,loss:1.8688,mean_loss:2.412,mean_f1:0.064:  16%|█▌        | 126/800 [02:07<11:11,  1.00it/s]

preds:[1, 1, 1, 1, 1],label:[1, 1, 0, 4, 1]


epoch:0,batch:127,lr:0.001,loss:2.427,mean_loss:2.412,mean_f1:0.063:  16%|█▋         | 127/800 [02:08<11:06,  1.01it/s]

preds:[1, 1, 1, 1, 1],label:[2, 0, 8, 3, 8]


epoch:0,batch:128,lr:0.001,loss:1.9968,mean_loss:2.409,mean_f1:0.064:  16%|█▌        | 128/800 [02:09<10:58,  1.02it/s]

preds:[1, 1, 1, 1, 1],label:[0, 4, 4, 2, 1]


epoch:0,batch:129,lr:0.001,loss:2.2616,mean_loss:2.408,mean_f1:0.063:  16%|█▌        | 129/800 [02:10<10:52,  1.03it/s]

preds:[1, 1, 1, 1, 1],label:[4, 2, 2, 6, 3]


epoch:0,batch:130,lr:0.001,loss:2.4202,mean_loss:2.408,mean_f1:0.063:  16%|█▋        | 130/800 [02:11<10:43,  1.04it/s]

preds:[1, 1, 1, 1, 1],label:[6, 3, 2, 0, 8]


epoch:0,batch:131,lr:0.001,loss:2.0343,mean_loss:2.405,mean_f1:0.062:  16%|█▋        | 131/800 [02:12<10:36,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[2, 3, 4, 3, 0]


epoch:0,batch:132,lr:0.001,loss:1.8147,mean_loss:2.401,mean_f1:0.064:  16%|█▋        | 132/800 [02:13<10:33,  1.05it/s]

preds:[1, 1, 1, 1, 1],label:[0, 0, 1, 1, 0]


epoch:0,batch:133,lr:0.001,loss:3.0049,mean_loss:2.405,mean_f1:0.063:  17%|█▋        | 133/800 [02:15<13:26,  1.21s/it]

preds:[1, 1, 1, 1, 1],label:[10, 5, 5, 7, 0]


epoch:0,batch:134,lr:0.001,loss:1.9973,mean_loss:2.402,mean_f1:0.063:  17%|█▋        | 134/800 [02:16<12:29,  1.13s/it]

preds:[1, 1, 1, 1, 1],label:[0, 4, 2, 2, 3]


epoch:0,batch:135,lr:0.001,loss:2.2236,mean_loss:2.401,mean_f1:0.062:  17%|█▋        | 135/800 [02:17<11:55,  1.08s/it]

preds:[1, 1, 1, 1, 1],label:[0, 6, 3, 4, 2]


epoch:0,batch:136,lr:0.001,loss:2.2269,mean_loss:2.399,mean_f1:0.063:  17%|█▋        | 136/800 [02:18<11:45,  1.06s/it]

preds:[1, 1, 1, 1, 1],label:[3, 6, 4, 3, 1]


epoch:0,batch:137,lr:0.001,loss:2.0787,mean_loss:2.397,mean_f1:0.063:  17%|█▋        | 137/800 [02:19<12:00,  1.09s/it]

preds:[1, 1, 1, 1, 1],label:[1, 3, 5, 0, 1]


epoch:0,batch:138,lr:0.001,loss:2.1095,mean_loss:2.395,mean_f1:0.064:  17%|█▋        | 138/800 [02:20<11:52,  1.08s/it]

preds:[1, 1, 1, 1, 1],label:[7, 0, 0, 1, 1]


epoch:0,batch:139,lr:0.001,loss:2.3083,mean_loss:2.394,mean_f1:0.064:  17%|█▋        | 139/800 [02:21<11:22,  1.03s/it]

preds:[1, 1, 1, 1, 1],label:[5, 0, 0, 0, 8]


epoch:0,batch:140,lr:0.001,loss:1.8035,mean_loss:2.39,mean_f1:0.064:  18%|█▉         | 140/800 [02:22<11:04,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[1, 0, 0, 2, 1]


epoch:0,batch:141,lr:0.001,loss:2.1477,mean_loss:2.389,mean_f1:0.065:  18%|█▊        | 141/800 [02:23<10:57,  1.00it/s]

preds:[1, 1, 1, 1, 1],label:[3, 5, 4, 1, 0]


epoch:0,batch:142,lr:0.001,loss:2.4138,mean_loss:2.389,mean_f1:0.064:  18%|█▊        | 142/800 [02:24<11:02,  1.01s/it]

preds:[1, 1, 1, 1, 1],label:[2, 2, 0, 8, 7]


epoch:0,batch:143,lr:0.001,loss:2.0732,mean_loss:2.386,mean_f1:0.064:  18%|█▊        | 143/800 [02:25<10:59,  1.00s/it]

preds:[1, 1, 1, 1, 1],label:[0, 5, 2, 0, 2]


epoch:0,batch:144,lr:0.001,loss:2.4219,mean_loss:2.387,mean_f1:0.064:  18%|█▊        | 144/800 [02:26<11:20,  1.04s/it]

preds:[0, 0, 0, 0, 0],label:[0, 13, 2, 3, 0]


epoch:0,batch:145,lr:0.001,loss:1.8379,mean_loss:2.383,mean_f1:0.065:  18%|█▊        | 145/800 [02:28<15:23,  1.41s/it]

preds:[0, 0, 0, 0, 0],label:[2, 3, 0, 1, 0]


epoch:0,batch:146,lr:0.001,loss:2.319,mean_loss:2.383,mean_f1:0.064:  18%|██         | 146/800 [02:30<14:52,  1.37s/it]

preds:[0, 0, 0, 0, 0],label:[3, 1, 5, 5, 1]


epoch:0,batch:147,lr:0.001,loss:1.9359,mean_loss:2.38,mean_f1:0.064:  18%|██         | 147/800 [02:31<13:37,  1.25s/it]

preds:[0, 0, 0, 0, 0],label:[4, 3, 2, 1, 0]


epoch:0,batch:148,lr:0.001,loss:2.5479,mean_loss:2.381,mean_f1:0.064:  18%|█▊        | 148/800 [02:33<18:50,  1.73s/it]

preds:[0, 0, 0, 0, 0],label:[8, 0, 8, 2, 6]


epoch:0,batch:149,lr:0.001,loss:2.1457,mean_loss:2.379,mean_f1:0.065:  19%|█▊        | 149/800 [02:35<18:32,  1.71s/it]

preds:[0, 0, 0, 0, 0],label:[1, 1, 9, 1, 0]


epoch:0,batch:150,lr:0.001,loss:2.4074,mean_loss:2.379,mean_f1:0.064:  19%|█▉        | 150/800 [02:36<17:04,  1.58s/it]

preds:[0, 0, 0, 0, 0],label:[4, 4, 2, 3, 7]


epoch:0,batch:151,lr:0.001,loss:1.9484,mean_loss:2.376,mean_f1:0.064:  19%|█▉        | 151/800 [02:37<15:14,  1.41s/it]

preds:[0, 0, 0, 0, 0],label:[4, 1, 1, 1, 3]


epoch:0,batch:152,lr:0.001,loss:2.8475,mean_loss:2.38,mean_f1:0.064:  19%|██         | 152/800 [02:39<16:02,  1.49s/it]

preds:[0, 0, 0, 0, 0],label:[6, 9, 0, 4, 7]


epoch:0,batch:153,lr:0.001,loss:2.1853,mean_loss:2.378,mean_f1:0.064:  19%|█▉        | 153/800 [02:40<14:29,  1.34s/it]

preds:[0, 0, 0, 0, 0],label:[6, 5, 0, 1, 0]


epoch:0,batch:154,lr:0.001,loss:1.9209,mean_loss:2.375,mean_f1:0.065:  19%|█▉        | 154/800 [02:41<14:02,  1.30s/it]

preds:[0, 0, 0, 0, 0],label:[0, 1, 0, 1, 8]


epoch:0,batch:155,lr:0.001,loss:1.8294,mean_loss:2.372,mean_f1:0.065:  19%|█▉        | 155/800 [02:42<13:39,  1.27s/it]

preds:[0, 0, 0, 0, 0],label:[2, 1, 0, 1, 3]


epoch:0,batch:156,lr:0.001,loss:2.4993,mean_loss:2.373,mean_f1:0.066:  20%|█▉        | 156/800 [02:44<13:35,  1.27s/it]

preds:[0, 0, 0, 0, 0],label:[9, 0, 0, 0, 10]


epoch:0,batch:157,lr:0.001,loss:1.8793,mean_loss:2.369,mean_f1:0.069:  20%|█▉        | 157/800 [02:45<12:38,  1.18s/it]

preds:[0, 0, 0, 0, 0],label:[0, 7, 0, 0, 0]


epoch:0,batch:157,lr:0.001,loss:1.8793,mean_loss:2.369,mean_f1:0.069:  20%|█▉        | 157/800 [02:46<11:22,  1.06s/it]


RuntimeError: [enforce fail at C:\actions-runner\_work\pytorch\pytorch\builder\windows\pytorch\caffe2\serialize\inline_container.cc:319] . unexpected pos 3021440 vs 3021360

　　直接进行卷积对减少模型复杂度没有用，反而因为增加了卷积，让模型变得更大，更难以训练了。基于这样的情况，又回过头分析了一下，text的长度分布问题，发现绝大多数的text的长度都是在5000以下，所以前面思路中把每个text都扩充到59000多可能有些浪费，试试看把每个text控制在5000长度再试试。

In [1]:
import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

class MyDataset(Dataset):
    def __init__(self,csv_path):
        csv_data = pd.read_csv(csv_path,sep='\t')    
        self.text_data = csv_data.text
        self.label_data = csv_data.label
        
    def __getitem__(self,index):
        #所有text内的token索引增加1，0空出来代表空格，将每个text控制在1000长
        text_str = self.text_data[index]
        text_list = [int(x)+1 for x in text_str.split()]
        if len(text_list)>=1000:
            text_list = text_list[:1000]
        else:
            text_list.extend([0]*(1000-len(text_list)))
        text_array = np.array(text_list)
        label_array = np.array(self.label_data[index])
        return text_array,label_array
        
    def __len__(self):
        return len(self.text_data)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(7550,100)
        self.rnn = nn.GRU(100,50,batch_first=True)
        self.fc = nn.Linear(50,14)
        
    def forward(self,X):
#         print('X:',X.shape)
        X = self.embedding(X)
#         print('embedding:',X.shape)
        _,X = self.rnn(X)
#         print('rnn:',X.shape)
        X = X.squeeze(dim=0)
#         print('squeeze:',X.shape)
        y_hat = self.fc(X)
#         print('y_hat:',y_hat.shape)
        return y_hat

    
class MyTrain():
    def __init__(self,max_epoch=1,random_seed=1,lr=0.001,out_dir='./'):
        self.max_epoch = max_epoch
        self.random_seed = random_seed
        self.lr = lr
        self.out_dir = out_dir
        self.iter = 0
        
    def fix_random(self):
        import random
        import numpy as np
        import torch
        random.seed(self.random_seed)
        np.random.seed(self.random_seed)
        torch.random.manual_seed(self.random_seed)
        torch.cuda.random.manual_seed_all(self.random_seed)
        torch.backends.cudnn.deterministic = True
        print(f'random seed:{self.random_seed}')
        
    def my_train(self):
        
        max_epoch,lr = self.max_epoch,self.lr
        if self.random_seed is not None:
            self.fix_random()
        my_dataset = MyDataset('./train_set.csv')
        my_model = MyModel()
        my_model.train()          #将模型设置为训练模式
        my_optim = torch.optim.Adam(my_model.parameters(),lr=lr)
        my_loss = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            my_model.cuda()
            my_loss.cuda()
        print(f'train device:{next(iter(my_model.parameters())).device}')  #显示训练设备
        
        for epoch_index in range(max_epoch):
            loss_list = []
            f1_score_list = []
            best_f1_score = 0
            my_dataloader = DataLoader(my_dataset,batch_size=20,shuffle=True)
            my_dataloader = tqdm(my_dataloader)
            for batch_index,batch_data in enumerate(my_dataloader):
                batch_text,batch_label = batch_data
                print('batch_text:',batch_text.shape)
                print('batch_label:',batch_label)
                if torch.cuda.is_available():
                    batch_text = batch_text.cuda()
                    batch_label = batch_label.cuda()
                    
                y_hat = my_model(batch_text)
                batch_loss = my_loss(y_hat,batch_label)
                
                my_optim.zero_grad()
                batch_loss.backward()
#                 for i in my_model.parameters():
#                     print('grad:',i.grad)
                my_optim.step()
                my_optim.param_groups[0]['lr'] = lr*(0.8**((self.iter//1000)%10))
                
                #显示batch结果
                batch_lr = round(my_optim.param_groups[0]['lr'],5)
                batch_loss = round(batch_loss.item(),4)
                loss_list.append(batch_loss)
                mean_loss = round((sum(loss_list)/len(loss_list)),3)

                batch_f1_score = self.f1_score(y_hat.data,batch_label.data)
                f1_score_list.append(batch_f1_score)
                mean_f1 = round(sum(f1_score_list)/len(f1_score_list),3)
                my_dataloader.set_description(f'epoch:{epoch_index},batch:{batch_index},lr:{batch_lr},loss:{batch_loss},mean_loss:{mean_loss},mean_f1:{mean_f1}')
                
                #存储模型
                torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_last'))                
                if batch_f1_score>best_f1_score:
                    torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_best'))
                    best_f1_score = batch_f1_score
                    
                self.iter += 1
                
    def f1_score(self,y_hat,label,eps=1e-8):
        #y_hat(N,C),label(1)
        y_hat = y_hat.cpu()
        label = label.cpu()
        preds_list = list(torch.argmax(y_hat,dim=1).numpy())
        label_list = list(label.numpy())
#         print(f'preds:{preds_list},label:{label_list}')
        class_index_list = []
        for class_index in label_list:
            if class_index not in class_index_list:
                class_index_list.append(class_index)

        f1_score_list = []
        for index in class_index_list:
            if index not in preds_list:
                sub_f1_score = 0
            else:
                tp = 0
                fp = 0
                fn = 0
                for i in range(len(preds_list)):
                    if preds_list[i] == index and label_list[i] == index:
                        tp+=1
                    if preds_list[i] == index and label_list[i] != index: 
                        fp+=1
                    if preds_list[i] != index and label_list[i] == index:
                        fn+=1
                prec_val = tp/(tp+fp) 
                recall_val = tp/(tp+fn)
                sub_f1_score = 2*(prec_val*recall_val)/(prec_val+recall_val+eps)
            f1_score_list.append(sub_f1_score)

        batch_f1_score = sum(f1_score_list)/len(f1_score_list)

        return batch_f1_score
        
        
        
#     #验证所有的验证集数据
#     def my_valid(self,valid_dataloader,model,criterion):

#         #将模型设置为计算模型
#         model.eval()

#         total_loss = 0
#         with torch.no_grad():
#             for batch_index,valid_data in enumerate(valid_dataloader):
#                 batch_feat,batch_label = valid_data
#                 y_hat = model(batch_feat)
#                 loss = criterion(y_hat,batch_label)
#                 total_loss+=loss
#             total_loss/=(batch_index+1)

#         return total_loss

In [2]:
if __name__ == '__main__':
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    MyTrain(max_epoch=10,random_seed=None).my_train()

train device:cuda:0


epoch:0,batch:0,lr:0.001,loss:2.7137,mean_loss:2.714,mean_f1:0.0:   0%|                      | 0/10000 [00:00<?, ?it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  3,  0,  6,  2,  1,  8, 13,  1,  1,  1,  1,  4,  2,  8,  7,  9,
         0,  7])


epoch:0,batch:1,lr:0.001,loss:2.7193,mean_loss:2.716,mean_f1:0.0:   0%|              | 1/10000 [00:00<30:05,  5.54it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 2, 4, 2, 0, 0, 2, 6, 0, 8, 1, 1, 1, 2, 1, 0, 8, 2, 1, 0])


epoch:0,batch:7,lr:0.001,loss:2.5346,mean_loss:2.593,mean_f1:0.044:   0%|            | 5/10000 [00:00<08:45, 19.02it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  0,  3,  5,  1, 10,  0,  2,  1,  9,  5,  2,  0,  2,  1,  2,  1,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2,  0,  0,  3,  6,  2,  4, 10,  0,  4,  2,  1,  2,  7,  3,  1,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  8,  3,  1,  3,  7,  0,  9,  0,  0,  0,  3,  0, 12,  0,  3,  3,  3,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 12,  5,  5,  5,  1,  0,  0,  1,  0,  5,  2,  8,  3,  0,  2,  7,  1,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1,  2,  8,  8,  2,  6,  7,  9,  0,  1,  1,  2,  3,  1,  5,  1,  7,
         0, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 2, 7, 1, 3, 2, 0, 3, 4, 0, 2, 2, 8, 3, 2, 0, 0, 0, 1])


epoch:0,batch:8,lr:0.001,loss:2.4176,mean_loss:2.574,mean_f1:0.054:   0%|            | 5/10000 [00:00<08:45, 19.02it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 7, 6, 0, 1, 8, 9, 1, 2, 4, 2, 6, 1, 9, 3, 3, 1, 1, 1])


epoch:0,batch:15,lr:0.001,loss:2.288,mean_loss:2.491,mean_f1:0.051:   0%|           | 14/10000 [00:00<05:18, 31.34it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 1, 5, 8, 0, 3, 5, 1, 8, 0, 4, 3, 2, 6, 1, 7, 5, 1, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  7,  2,  1,  9, 11,  0, 11,  0,  7,  4,  5,  1,  8,  1,  2,  0,  0,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 0, 8, 2, 8, 0, 3, 0, 5, 2, 0, 1, 7, 5, 3, 2, 0, 0, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  2,  1,  0,  1,  1, 11,  1,  3,  3,  3,  9,  9,  1,  2,  2,  4,  2,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1,  2,  2,  0,  5,  8, 11,  0,  1,  0,  1,  3,  8,  8,  6,  2,  9,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 0, 0, 7, 4, 2, 3, 2, 2, 2, 1, 7, 0, 3, 3, 7, 0, 8, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 13,  0,  4,  3,  0,  2,  1,  1,  0,  1,  6,  0,  1,  0,  1,  0,  0,
         9,  0])


epoch:0,batch:16,lr:0.001,loss:2.467,mean_loss:2.49,mean_f1:0.052:   0%|            | 14/10000 [00:00<05:18, 31.34it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 5, 5, 8, 4, 2, 0, 0, 9, 0, 2, 1, 2, 0, 7, 4, 6, 5, 4])


epoch:0,batch:22,lr:0.001,loss:2.1468,mean_loss:2.453,mean_f1:0.058:   0%|          | 22/10000 [00:00<04:47, 34.76it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 2, 3, 4, 8, 2, 2, 0, 1, 8, 0, 0, 1, 0, 0, 6, 3, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 5, 0, 0, 1, 1, 2, 5, 7, 1, 2, 1, 2, 2, 5, 2, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  7,  0,  1, 10,  1,  3,  7,  2,  2,  3, 12,  2,  2,  7, 10,  1,  4,
         2,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  3,  0,  9,  0,  2,  4,  3,  1,  0,  6, 11,  6,  0,  3,  0,  0,
         4,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  2,  1,  1, 11,  2,  3,  7,  6,  7,  2,  4,  6,  3,  2,  3,  7,
        12,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 8, 0, 3, 0, 0, 1, 3, 1, 1, 1, 1, 5, 6, 3, 0, 4, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:23,lr:0.001,loss:2.5878,mean_loss:2.459,mean_f1:0.057:   0%|          | 22/10000 [00:00<04:47, 34.76it/s]

tensor([4, 7, 3, 1, 4, 5, 3, 1, 0, 8, 4, 8, 1, 2, 1, 3, 9, 4, 5, 0])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:30,lr:0.001,loss:2.1454,mean_loss:2.442,mean_f1:0.059:   0%|          | 27/10000 [00:00<04:38, 35.81it/s]

tensor([ 5,  1,  4,  0, 13,  1,  2,  0,  3,  1,  1,  4,  3,  7,  3,  3,  5,  3,
         2, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  6,  1,  1,  1,  2,  4,  2,  2,  3,  2,  2,  9,  8,  4,  7, 11,
         4,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 7, 8, 0, 1, 3, 5, 8, 3, 2, 8, 0, 8, 1, 5, 5, 0, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  3,  1,  0,  5, 10,  1,  5, 11,  1,  1,  2,  3,  4,  5,  9,  3,  3,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 5, 5, 7, 3, 1, 6, 0, 3, 2, 1, 8, 1, 1, 0, 0, 6, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  2,  5,  2,  1,  5, 11,  0,  1,  1, 12,  1,  2,  7,  4, 11, 10,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 3, 5, 3, 8, 1, 2, 2, 2, 1, 1, 2, 1, 9, 2, 1, 0, 0, 2])


epoch:0,batch:37,lr:0.001,loss:2.2698,mean_loss:2.458,mean_f1:0.059:   0%|          | 35/10000 [00:01<04:30, 36.84it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  0,  1,  6,  0,  2,  4, 10,  1,  2,  7,  4,  9,  6,  0,  3,  7,
         0,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  9,  8,  3, 12, 10,  6,  2,  2,  6,  0,  0,  0,  1,  3,  1,  1,  2,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  3,  8,  1,  0, 12,  0,  8,  0,  5,  2,  1,  6, 11,  2, 10,  4,  0,
         1,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  3,  3,  3,  1,  5, 10,  5,  7,  5,  0,  4,  7,  5,  6,  8,  1,
         7,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4,  2,  2,  0,  0,  3, 10,  2, 10,  0,  9,  8,  3,  4, 10,  6,  3,
         3,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  0,  1,  2,  2,  1,  4,  1,  1,  2,  3,  2,  0,  4,  2,  2,  1,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 6, 1, 4, 1, 8, 3, 1, 0, 3, 1, 0, 4, 5, 4, 2, 1, 2, 1])
batch_text: t

epoch:0,batch:45,lr:0.001,loss:2.3097,mean_loss:2.447,mean_f1:0.062:   0%|          | 43/10000 [00:01<04:25, 37.51it/s]

tensor([1, 2, 4, 1, 3, 2, 8, 9, 7, 0, 8, 0, 5, 9, 7, 3, 5, 1, 6, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 12,  1,  1,  1,  7, 13,  0,  6,  4,  5,  4,  0,  4,  1,  0,  8,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6, 10,  0,  0,  6,  0,  1,  3,  0,  4,  0,  6,  2,  3,  3,  6,  0,  2,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 7, 9, 2, 9, 4, 0, 1, 2, 0, 4, 7, 0, 9, 0, 3, 1, 7, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 10,  9,  2,  1,  1,  2,  2,  5,  8,  9,  6,  9,  3,  0,  3,  1,  1,
         8,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  3,  0,  3,  4,  1,  6,  0,  0,  3,  2,  1,  7,  2,  4,  0,  3,
         1, 12])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 2, 9, 2, 1, 0, 8, 1, 1, 1, 5, 3, 2, 5, 3, 3, 0, 1, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4,  2,  3,  5,  4,  8,  3, 11,  1,  1, 10,  8,  0,  0,  

epoch:0,batch:53,lr:0.001,loss:2.3127,mean_loss:2.43,mean_f1:0.07:   1%|            | 52/10000 [00:01<04:17, 38.59it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  7,  5,  2,  9,  0,  9, 10, 11,  9,  2,  7,  0,  3,  2,  0,  2,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  8,  3,  0,  0,  0,  2,  2,  2,  0,  0,  7,  2,  2,  2,  1,  3,  0,
        10,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  7,  2, 11,  3,  3,  7,  0, 10,  2,  3,  3,  3,  2,  2,  7, 11,
         6,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 0, 0, 0, 0, 2, 2, 2, 0, 1, 2, 2, 4, 3, 4, 4, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  3,  3,  9,  2,  4,  1,  7,  7,  4,  2,  3,  1, 11,  1,  0,  1,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 13,  2,  3,  3,  5,  8,  3,  2,  3,  2,  1,  7,  3,  2,  0,  4,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 3, 0, 0, 0, 6, 3, 7, 1, 2, 1, 3, 0, 2, 0, 6, 2, 6, 7])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:61,lr:0.001,loss:2.3759,mean_loss:2.418,mean_f1:0.067:   1%|          | 57/10000 [00:01<04:12, 39.36it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 0, 9, 1, 3, 9, 3, 3, 3, 0, 8, 3, 0, 1, 2, 5, 8, 7, 6, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 8, 1, 6, 1, 1, 3, 5, 3, 3, 1, 4, 2, 4, 1, 9, 4, 8, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 3, 7, 1, 3, 6, 1, 0, 4, 4, 6, 0, 1, 0, 6, 2, 0, 7, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  7,  1,  1,  3, 10,  2,  1, 10,  2,  4,  0,  0,  6,  0,  2,  2,  1,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 6, 0, 0, 0, 0, 8, 2, 3, 1, 2, 0, 7, 2, 1, 7, 1, 1, 5, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  5,  2,  3, 10,  0,  1,  6,  3,  0,  6,  7,  4,  3,  4,  9,  1,  0,
         4,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 5, 6, 2, 0, 2, 2, 5, 0, 0, 0, 0, 3, 4, 3, 1, 7, 2, 5, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1,  0,  2, 11,  3,  1,  1,  3, 12,  2,  2,  0,  3,  2,  6, 12,  

epoch:0,batch:69,lr:0.001,loss:2.1706,mean_loss:2.4,mean_f1:0.07:   1%|             | 67/10000 [00:01<04:04, 40.60it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4, 10,  0,  0,  2,  3,  3,  2,  1,  3,  1,  2,  7, 10,  1,  1,  2,
         6,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  4,  2,  2,  0,  4,  2,  8,  2,  1,  0,  1,  0,  8,  0,  2,  1,  7,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  9,  2,  0, 10,  5,  5,  2,  0,  0,  4,  3,  1,  1,  9,  7,  3,  0,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 7, 1, 8, 3, 1, 0, 5, 1, 7, 0, 9, 4, 6, 3, 4, 0, 3, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 11,  2,  0,  8,  0,  0,  2,  2,  0,  1,  3,  7,  0,  1,  0,  3,  9,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8, 10,  8,  5,  6,  3,  7,  2,  1,  9,  0,  1,  2,  1,  3,  3,  2,  1,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 3, 1, 5, 0, 2, 0, 0, 1, 1, 3, 0, 6, 1, 3, 2, 8, 0, 0, 4])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:76,lr:0.001,loss:2.3072,mean_loss:2.393,mean_f1:0.069:   1%|          | 77/10000 [00:02<04:12, 39.30it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4, 10, 11,  0,  2,  1, 11,  2,  1,  2,  2,  6,  2,  9,  0,  2,  0,  0,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 3, 6, 5, 6, 1, 1, 1, 3, 0, 8, 7, 0, 0, 2, 8, 2, 6, 8, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  2,  0,  3,  5,  2,  3,  2,  8,  0,  0,  8,  4,  0,  2, 10,  1,  7,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 7, 6, 0, 2, 1, 0, 4, 0, 6, 1, 2, 5, 1, 9, 0, 1, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 4, 1, 2, 2, 6, 1, 7, 3, 0, 1, 3, 5, 2, 9, 0, 6, 3, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 2, 2, 1, 3, 2, 1, 4, 0, 0, 9, 1, 1, 1, 4, 8, 2, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  2,  8,  3,  1,  9, 12,  0,  6,  3,  0,  1,  3,  1,  2,  1,  2,
        13, 11])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:84,lr:0.001,loss:2.1813,mean_loss:2.391,mean_f1:0.069:   1%|          | 82/10000 [00:02<04:06, 40.20it/s]

tensor([ 3,  4,  1,  2,  1,  0,  6,  1,  5,  0,  5,  5,  1, 10,  0,  0,  2,  2,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  0,  5,  3,  2,  7,  2,  2,  0, 10,  4,  0,  7,  3,  1,  0,  4,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9, 12,  1,  1,  2,  2,  8, 11, 11,  4,  7,  5,  3,  5,  0,  1,  4,  1,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 6, 8, 2, 4, 4, 2, 9, 7, 1, 1, 3, 1, 1, 7, 3, 2, 2, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 0, 3, 9, 7, 2, 1, 4, 0, 5, 8, 2, 7, 1, 1, 3, 1, 2, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  6,  3,  1,  2,  3,  1,  1,  7,  2,  2, 10,  1,  3,  1,  0,  6,
        10,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  4,  0, 10,  1,  3,  0,  5, 10,  0,  0,  1,  1,  7,  3,  2,  1,
         0,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 0, 4, 0, 2, 1, 2, 0, 0, 1, 5, 

epoch:0,batch:92,lr:0.001,loss:2.3334,mean_loss:2.384,mean_f1:0.068:   1%|          | 92/10000 [00:02<04:03, 40.77it/s]

tensor([ 4,  1,  0,  4,  2,  3,  0,  1,  4,  2,  4,  8,  9,  4,  1,  2,  1,  1,
         1, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  4,  3,  2,  1,  0,  4, 11,  2,  7,  6,  2,  3,  1,  4, 10,  7,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  4,  6,  9,  3,  1,  1,  1,  0,  4,  2, 11,  0,  2,  2,  5,  0,  9,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1,  6,  2,  3,  2,  0,  6,  1,  3,  9,  4, 12,  2,  7,  1,  7,  2,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  7,  3,  4,  1,  7,  0,  1,  2,  5,  2, 11,  2,  0,  5, 10,  1,  3,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 7, 7, 2, 6, 2, 1, 8, 4, 0, 2, 1, 0, 3, 6, 2, 2, 8, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  3,  1,  2,  7,  2,  2,  2,  0,  7,  1,  7,  4,  0,  2,  8,  4,
        10,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  

epoch:0,batch:100,lr:0.001,loss:2.4654,mean_loss:2.375,mean_f1:0.068:   1%|         | 97/10000 [00:02<04:01, 41.07it/s]

tensor([ 1,  2,  4,  0,  0, 11,  4,  1,  3,  2,  1,  2,  0,  7,  1,  1,  9,  3,
         8,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  5,  1,  5,  5, 11,  0,  1,  3,  0,  1,  1,  4,  2,  1,  2,  0,  1,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  5,  0,  4,  1,  3,  4, 10,  1,  1,  0,  3,  0,  3,  1, 10,  4,
         3,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 2, 0, 2, 5, 2, 0, 0, 6, 3, 1, 6, 1, 0, 4, 5, 3, 0, 0, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 1, 4, 0, 3, 2, 0, 3, 2, 3, 4, 2, 2, 0, 2, 0, 0, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2, 10, 11,  5,  7,  1,  4,  0,  1,  2,  6,  2,  2,  2, 12,  8,  3,  9,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  1,  1,  4,  4, 10,  5,  1,  5,  6,  2,  2,  0,  1,  9,  2,  1,  4,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  5, 13,  1,  3,  0,  0,  7,  

epoch:0,batch:108,lr:0.001,loss:2.1951,mean_loss:2.369,mean_f1:0.068:   1%|        | 107/10000 [00:02<03:57, 41.70it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 4, 7, 2, 2, 0, 1, 7, 5, 1, 8, 2, 2, 1, 5, 9, 8, 2, 2, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  7, 11,  0,  5,  7,  9,  2,  1,  0,  0,  7,  1,  7,  9,  4,  6,  9,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 3, 4, 4, 6, 8, 9, 0, 0, 0, 1, 0, 1, 1, 3, 3, 2, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 2, 1, 6, 3, 3, 3, 0, 6, 9, 7, 1, 6, 3, 0, 4, 1, 4, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0, 11,  7,  1,  0,  5,  2,  1,  4,  5,  9,  0,  3,  4,  2,  0,  8,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 0, 2, 2, 2, 2, 9, 5, 2, 8, 8, 3, 5, 1, 2, 0, 2, 7, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 6, 9, 0, 2, 1, 2, 1, 2, 1, 2, 3, 2, 3, 4, 0, 3, 4, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 7, 0, 8, 2, 7, 8, 2, 0, 2, 0, 6, 2, 6, 0, 2, 2, 0, 1])
batch_te

epoch:0,batch:117,lr:0.001,loss:2.2296,mean_loss:2.36,mean_f1:0.066:   1%|         | 117/10000 [00:03<04:02, 40.78it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  2,  0,  0,  2,  1,  9,  3,  0,  2,  3,  0,  2,  5,  2, 11,  4,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 3, 0, 1, 0, 6, 3, 4, 3, 3, 1, 5, 4, 2, 2, 1, 4, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 1, 0, 4, 5, 1, 3, 2, 9, 2, 4, 0, 1, 0, 3, 1, 0, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  3,  1,  7,  1,  7,  0,  0,  1, 12,  3,  2,  2,  0,  2,  3,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 1, 7, 1, 1, 4, 8, 2, 1, 8, 1, 7, 1, 4, 0, 1, 3, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1,  1,  0,  3,  3,  8,  5,  6,  8,  1,  2, 11,  3,  3,  4,  8,  3,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  1,  2,  8, 10,  0,  6,  1,  2,  3, 11,  8,  6,  3,  8,  6,  1,
         3,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  8,  

epoch:0,batch:125,lr:0.001,loss:2.1607,mean_loss:2.358,mean_f1:0.066:   1%|        | 122/10000 [00:03<04:03, 40.53it/s]

tensor([10,  7,  1,  5,  4,  2, 12,  2,  0,  0,  0,  5,  1,  0,  8,  5,  0,  7,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  5,  1,  1,  4,  1,  2,  4,  7,  2,  2,  3,  0,  2,  7, 11,  0,  3,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  0,  9,  1,  2,  1,  1,  1,  5,  1,  0,  0,  4,  1,  5,  3, 11,
         3,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4,  0,  5,  0,  0,  1,  4,  6,  1,  2,  0,  5,  3,  2,  0,  1,  1,
        10,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1, 11,  7,  1,  1,  2,  3,  6,  0,  0,  9,  1,  8,  3, 13,  6,  0,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  1,  1,  1,  0,  0,  6, 10,  3,  3,  3,  1,  2, 10,  1,  7,  4,  1,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  0,  8,  3,  2,  2,  2,  6,  3,  0,  0,  1,  1,  0,  0,  8,  2,  1,
         2,  6])
batch_text: torch.Size([20, 1000]

epoch:0,batch:133,lr:0.001,loss:2.3184,mean_loss:2.344,mean_f1:0.067:   1%|        | 132/10000 [00:03<04:03, 40.49it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0, 13,  0,  4,  4,  2,  0,  2,  9,  3, 11,  1,  1,  0,  2,  3,  6,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 6, 4, 3, 1, 1, 2, 1, 1, 1, 0, 3, 2, 1, 3, 3, 1, 8, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2, 10,  3,  2,  0,  0,  6,  3,  3,  0,  7,  1,  1,  0,  5,  1,  0,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 1, 0, 2, 3, 2, 1, 1, 3, 6, 8, 1, 9, 3, 1, 2, 4, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 0, 2, 7, 3, 2, 0, 0, 6, 0, 0, 2, 2, 1, 4, 4, 5, 7, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 0, 1, 3, 0, 2, 0, 5, 0, 5, 8, 6, 0, 0, 2, 3, 3, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  1,  2,  2,  0,  8,  3,  1,  5,  3,  1,  0, 10,  2,  1,  1,  3,  1,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  3,  0,  9,  4,  3,  7,  1,  2,  3,  

epoch:0,batch:141,lr:0.001,loss:2.2253,mean_loss:2.337,mean_f1:0.069:   1%|        | 137/10000 [00:03<04:05, 40.18it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  0,  9,  1,  3,  1,  1,  5,  4,  1,  3,  3,  4,  5, 10,  8,  1,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 13, 11,  5,  1,  1,  2,  1,  1,  6,  0,  2,  2,  1,  1, 12,  1,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  4,  0,  8,  7,  3,  7,  0,  5,  5,  3,  2,  2,  3,  4,  0,  4,  2,
         5, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  2,  3,  1,  0,  6,  9,  3,  7,  1, 12,  1,  1,  3,  0,  2,  5,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  0,  5, 13,  1,  1,  2,  6,  7,  5,  3,  2,  0,  2,  3,  5,  3,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  2,  3,  1,  7,  5,  4,  2,  0,  2,  5,  2,  3,  4,  0,  2, 10,  0,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 2, 0, 3, 1, 8, 2, 1, 3, 0, 1, 1, 2, 5, 3, 2, 0, 1, 2])
batch_text: t

epoch:0,batch:149,lr:0.001,loss:2.3057,mean_loss:2.337,mean_f1:0.069:   1%|        | 147/10000 [00:03<04:03, 40.49it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 7, 9, 7, 0, 3, 0, 0, 1, 1, 2, 4, 2, 1, 1, 1, 3, 7, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  7,  0,  8,  4,  6,  2,  4,  7,  1,  2,  1,  0,  5,  2,  0,  3,
        11,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  0,  2,  5,  2,  1,  2,  1,  0,  4,  0, 10,  4,  0,  4,  1,  1,
         4,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 5, 6, 1, 4, 2, 2, 2, 1, 5, 3, 0, 2, 1, 1, 9, 2, 0, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  6,  9,  2,  8,  0,  0,  3,  1,  0, 11,  1,  2,  4,  6,  3,  5,  1,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  5, 10, 11,  2,  4,  6,  1,  2,  1, 13,  3,  3,  5,  0, 12,  6,  1,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  3,  2,  7,  6,  6,  0,  2,  9, 10,  5,  0,  0,  4,  0,  3,  6,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:157,lr:0.001,loss:2.2886,mean_loss:2.331,mean_f1:0.07:   2%|▏        | 157/10000 [00:04<04:00, 41.00it/s]

tensor([1, 5, 4, 2, 1, 0, 3, 3, 3, 2, 7, 1, 2, 7, 0, 1, 2, 0, 3, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  4,  5,  2, 10,  8,  0,  0,  0,  1,  1, 10,  1,  3,  2,  2,  0,  4,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 10, 10,  9,  0,  2,  0,  1,  4,  1, 12,  0,  3,  0,  3,  0,  1,  6,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  3,  2,  0,  3,  1,  6,  0,  1,  6,  8,  2,  2,  1,  1, 12,  0,
         7,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 6, 2, 0, 8, 2, 0, 0, 0, 5, 1, 2, 2, 4, 1, 2, 0, 1, 8, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 7, 0, 2, 1, 1, 0, 2, 6, 3, 0, 8, 1, 1, 3, 8, 2, 5, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 10,  5,  5,  2, 10,  8,  2,  2,  3,  1,  5,  1,  1,  0,  0,  5,  2,
         1,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  1,  5,  2, 10,  4, 10,  1,  2,  1,  5,  4,  0,  2,  

epoch:0,batch:164,lr:0.001,loss:1.9962,mean_loss:2.33,mean_f1:0.07:   2%|▏         | 162/10000 [00:04<04:03, 40.48it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  1,  3,  3,  5,  0,  0,  8,  9,  1,  3,  5, 11,  9,  8,  5,  2,
         6,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 7, 8, 2, 8, 0, 3, 3, 3, 3, 2, 0, 4, 4, 6, 0, 1, 2, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 11,  1, 12,  0,  4,  6,  4,  1,  2, 13,  1,  1,  0,  0,  0,  1,  0,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 9, 3, 5, 1, 2, 1, 3, 2, 0, 8, 1, 9, 3, 6, 4, 0, 3, 8, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 5, 2, 8, 5, 1, 8, 7, 1, 1, 3, 9, 5, 1, 2, 0, 0, 0, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  3,  0,  9,  2, 12,  8,  0,  5,  1,  4, 11,  1,  1,  1,  3,  3,
         6,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  3,  0,  2,  3,  1,  1,  2,  2,  0,  3,  3, 13,  0,  2,  1,  2,  1,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:172,lr:0.001,loss:2.1504,mean_loss:2.328,mean_f1:0.071:   2%|▏       | 172/10000 [00:04<04:01, 40.66it/s]

tensor([ 0,  4,  0,  1,  0,  5,  1,  2,  2,  1,  2,  6,  0,  7,  7,  0,  7, 12,
        10,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  6,  3, 11, 11,  1,  0,  2,  1,  6,  3,  6,  0,  5,  0,  2,  1,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 6, 3, 2, 0, 7, 4, 1, 2, 7, 2, 8, 1, 3, 1, 1, 3, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  8,  4,  0,  1,  2,  2,  8,  0,  8,  3,  2,  3, 10,  0,  1,  1,  7,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  2,  2,  1,  2,  2,  6,  0,  9,  0,  4, 12,  6,  3,  7,  1,  6,
         5,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  1,  0,  3,  2,  0,  7,  3,  1,  0, 10,  1,  6,  5,  2,  1,  6,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4, 12,  2,  3, 11,  1,  3,  8,  1,  4,  5,  1, 12,  2, 10,  5,  0,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  

epoch:0,batch:180,lr:0.001,loss:2.1144,mean_loss:2.325,mean_f1:0.071:   2%|▏       | 177/10000 [00:04<04:02, 40.55it/s]

tensor([5, 9, 0, 9, 1, 2, 2, 8, 0, 0, 1, 3, 3, 1, 1, 4, 3, 7, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  8,  5,  9,  1,  0,  3,  2,  3,  2,  2, 10,  1,  4,  0,  7,  2,  1,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4, 10,  0,  4,  0,  6,  0,  0,  8,  0,  2,  3,  7,  5,  0,  1,  6,  1,
         4, 12])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2, 10,  9,  0,  2,  1,  3,  4,  2,  1,  1,  0,  1, 12,  2,  6,  0,
         1, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  9, 10,  5,  1,  0,  0,  1,  0,  6,  3,  4,  1,  9,  3,  6,  0,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  3,  3,  3,  1,  0,  2,  1,  0,  3,  0,  6,  0,  0,  7,  1,  4,  1,
         7, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 11,  0,  5,  5,  7,  6,  0,  2,  3,  1,  0,  1,  0,  1,  6,  6,  0,
         8,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  

epoch:0,batch:188,lr:0.001,loss:2.2366,mean_loss:2.32,mean_f1:0.071:   2%|▏        | 187/10000 [00:04<04:02, 40.44it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1,  7,  2,  1,  2,  7,  0,  4, 11,  5, 13,  1,  4,  5,  0,  1,  0,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  3,  6,  1,  0,  1,  3,  0,  3,  0,  3, 12,  3,  3,  6,  5,  2,  6,
         3,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1,  7,  9,  4, 10,  1,  0,  8,  4,  2,  2,  1,  2,  4,  0,  1,  1,
         4,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  4,  3,  4,  0,  0,  3,  7,  4,  0,  3,  0,  4,  4,  7, 11,  3,
         8,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 0, 0, 1, 0, 2, 4, 1, 3, 1, 3, 2, 2, 1, 8, 0, 1, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 2, 2, 0, 7, 2, 1, 0, 4, 5, 0, 5, 1, 1, 1, 1, 0, 7, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2, 10,  3,  3,  0,  3,  2,  0,  2,  1,  0,  2,  6,  4,  2,  0,  7,  0,
         7,  3])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:196,lr:0.001,loss:2.1857,mean_loss:2.316,mean_f1:0.071:   2%|▏       | 192/10000 [00:05<04:01, 40.53it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2,  0,  5,  0,  2,  8,  3,  7,  1,  3, 10,  3,  2,  5,  1,  2,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  7,  1,  0,  4,  1,  2,  3,  6, 11,  2,  1,  1,  4,  2,  6,  6,
         0,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  4,  1,  0,  4,  2,  1,  3,  1,  2,  1,  7, 11,  6,  1,  5, 11,  0,
         5,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4, 6, 0, 3, 9, 4, 2, 1, 5, 1, 3, 0, 0, 1, 0, 1, 0, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  2,  1,  3,  8,  1,  2,  7,  5,  5,  0,  3,  5,  6,  1, 12,  0,  0,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 6, 2, 5, 3, 3, 0, 3, 1, 1, 3, 4, 2, 0, 2, 2, 0, 4, 0, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  1, 11,  9,  1,  1,  4,  2,  1,  0,  3,  3, 10,  2,  2,  1,  3,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:204,lr:0.001,loss:2.3542,mean_loss:2.316,mean_f1:0.071:   2%|▏       | 202/10000 [00:05<04:02, 40.33it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 0, 1, 5, 5, 3, 0, 0, 2, 2, 9, 0, 0, 5, 4, 2, 5, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 1, 1, 1, 5, 0, 8, 0, 8, 1, 2, 1, 1, 3, 3, 2, 0, 2, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  6, 12,  2,  2,  7,  2,  5,  2,  1,  7,  0,  1,  0,  9,  7,  2,  8,
         8,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  8,  9,  6,  1,  2,  3,  1,  6,  1,  1, 11,  4,  3,  0,  1,  2,  4,
         9,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  5,  0, 10,  5,  1,  7,  4,  3,  1,  3,  7,  5, 10,  7,  1,  4,  8,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 2, 3, 0, 5, 0, 0, 1, 2, 4, 2, 0, 0, 2, 3, 3, 4, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  2,  5,  3,  3,  5,  0,  2,  0,  3,  8,  8,  0, 10,  3,  1,  1,
         5,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  1,  

epoch:0,batch:212,lr:0.001,loss:2.1991,mean_loss:2.314,mean_f1:0.071:   2%|▏       | 212/10000 [00:05<04:04, 39.95it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 6, 1, 6, 1, 1, 4, 5, 3, 2, 0, 3, 2, 1, 6, 5, 2, 6, 5, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 3, 3, 7, 6, 5, 2, 2, 2, 1, 0, 0, 2, 1, 4, 2, 1, 4, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  2,  3,  4,  3, 10,  6,  4,  1,  2,  0,  9,  1,  1,  0,  5,  6,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 3, 5, 0, 3, 4, 0, 0, 5, 1, 7, 1, 1, 3, 1, 7, 0, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  3, 11,  6,  2,  5,  0,  4,  3,  1,  1,  3,  0, 12,  2,  0, 10,  2,
         6,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2,  6,  5,  2,  7,  0,  7,  3,  6,  1,  4,  5,  0, 10,  8, 10,
         0,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  8,  4, 10,  2,  6,  0,  1,  1,  0,  0,  3,  0, 10,  2,  2,  0,  0,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  2,  2,  

epoch:0,batch:220,lr:0.001,loss:2.4534,mean_loss:2.314,mean_f1:0.071:   2%|▏       | 217/10000 [00:05<04:01, 40.51it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  0,  9,  1,  1,  7,  1,  2,  9,  1,  9,  0, 11,  1,  2,  3,  0,
         5,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  8,  5,  9,  7,  1,  0,  2,  2,  1,  9,  1,  8, 12,  1,  1,  6,  8,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  6,  3,  1,  2,  4,  3,  1, 10, 11,  1,  2,  0,  4,  4,  1,  5, 10,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  6,  0,  2,  6,  1,  1,  4, 10, 11,  0,  3,  1,  1, 10,  9,  0,  7,
         5,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 4, 1, 4, 5, 3, 0, 0, 2, 7, 3, 0, 0, 2, 1, 3, 6, 9, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  0, 10,  3,  2,  4,  1,  1,  1,  2,  2,  2,  0,  3,  2,  1,  2,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 1, 2, 6, 2, 0, 0, 1, 2, 2, 1, 0, 2, 8, 0, 2, 2, 2, 2, 2])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:227,lr:0.001,loss:2.1661,mean_loss:2.312,mean_f1:0.07:   2%|▏        | 227/10000 [00:05<04:00, 40.67it/s]

tensor([7, 5, 3, 1, 0, 6, 1, 2, 8, 3, 0, 4, 0, 2, 3, 3, 5, 4, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 2, 2, 1, 1, 8, 0, 3, 6, 1, 8, 1, 5, 2, 1, 1, 9, 7, 4, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 0, 2, 4, 1, 7, 4, 0, 4, 2, 3, 4, 2, 0, 4, 5, 7, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 2, 1, 1, 0, 7, 6, 0, 5, 0, 3, 7, 6, 1, 4, 0, 0, 0, 6, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  7,  9,  1,  0,  8,  1,  3,  0,  6, 11,  1,  4,  2,  0,  1,  7,
         0, 13])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 2, 1, 3, 1, 1, 0, 3, 2, 7, 8, 0, 0, 2, 2, 1, 5, 4, 0, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 6, 6, 3, 0, 4, 1, 2, 8, 1, 4, 0, 7, 2, 2, 2, 0, 0, 7])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:235,lr:0.001,loss:2.1626,mean_loss:2.309,mean_f1:0.07:   2%|▏        | 232/10000 [00:06<04:00, 40.58it/s]

tensor([7, 0, 9, 1, 4, 2, 5, 2, 0, 3, 3, 0, 0, 0, 1, 8, 1, 0, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 2, 3, 5, 0, 6, 6, 4, 2, 5, 3, 2, 9, 2, 0, 2, 1, 4, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  8,  4,  6, 11,  1,  5,  3,  0, 11,  8,  7,  0,  0,  1,  3,  3,  2,
         7,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  8,  9,  0,  1,  3, 10,  0, 11,  1,  0,  8,  1,  3, 10,  3,  1,  1,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  1,  1,  4,  4,  0,  1,  1,  4,  1,  0,  0,  1,  2,  9,  4,  4,
         2, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 7, 2, 7, 2, 3, 0, 3, 5, 3, 3, 6, 2, 1, 1, 1, 0, 3, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  6,  7,  0,  0, 12,  1,  2,  3,  2,  1,  1,  4,  3,  5,  0,  0,  7,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 4, 2, 1, 1, 6, 0, 1, 4, 2, 9, 0, 3, 1, 4, 8, 0, 7, 0, 0])


epoch:0,batch:242,lr:0.001,loss:2.1992,mean_loss:2.305,mean_f1:0.07:   2%|▏        | 242/10000 [00:06<04:05, 39.81it/s]

tensor([ 1,  6,  9,  4,  3,  1,  4,  2,  3,  0,  1,  0,  2,  2,  1,  0,  4,  2,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  1,  3,  1,  5,  2,  4,  3, 12,  2,  1,  2, 11,  2, 11,  0,  4,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 4, 4, 4, 0, 1, 2, 0, 0, 0, 2, 0, 3, 0, 9, 2, 0, 0, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  4,  0,  0,  5,  2,  1,  4,  1, 13,  3,  4,  0,  3,  0,  2,  0,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 1, 7, 9, 4, 0, 0, 6, 2, 3, 1, 1, 0, 7, 0, 1, 0, 0, 0, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 2, 0, 1, 5, 1, 4, 0, 1, 1, 9, 1, 1, 6, 9, 4, 4, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  5, 12,  0,  0,  0,  2,  2,  9,  1,  2,  2,  3,  2,  3,  3,  3,  1,
         5,  0])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:250,lr:0.001,loss:2.3596,mean_loss:2.302,mean_f1:0.07:   2%|▏        | 247/10000 [00:06<04:02, 40.21it/s]

tensor([ 0,  4,  0,  1,  0,  4,  0,  1,  2,  1,  1,  6,  6, 12,  2,  7,  2, 10,
         4,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 0, 4, 2, 9, 1, 2, 1, 0, 5, 1, 4, 7, 2, 1, 9, 2, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 1, 3, 1, 1, 0, 0, 1, 1, 2, 0, 5, 0, 1, 1, 1, 2, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2, 11,  6,  1, 10,  2,  5,  1,  5,  1,  4,  5,  0,  5,  6,  9,  6,
         3,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 3, 0, 0, 1, 0, 1, 3, 6, 2, 3, 0, 2, 8, 0, 1, 1, 6, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  0,  2, 12,  9,  0,  3,  1,  6, 11,  5,  0,  2,  3, 12,  2,  0,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  2,  0,  0,  2,  6,  0,  8,  0,  0,  1,  3,  0,  0,  2, 11,  2,  9,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  2,  3,  3,  2, 10,  2, 11,  4,  3,  0,  4,  0,  3,  

epoch:0,batch:257,lr:0.001,loss:2.286,mean_loss:2.304,mean_f1:0.07:   3%|▎         | 256/10000 [00:06<04:05, 39.68it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  4,  0, 10,  2,  4, 11,  0,  2,  7,  9,  1,  2,  9,  0,  6,  3,  2,
         7,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  1,  1,  1,  0,  5,  1,  2,  0,  2,  6,  0,  0,  0,  1,  0,  8,  3,
         2,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 0, 3, 8, 5, 6, 1, 3, 5, 6, 3, 2, 4, 5, 2, 4, 1, 1, 2, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  1,  5,  0,  2,  2,  1,  8,  0, 13,  0,  1,  5,  6,  1,  0,  2,  3,
         6,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 0, 1, 0, 7, 3, 6, 3, 2, 3, 3, 1, 0, 4, 6, 0, 8, 0, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  9,  6,  9,  2, 13,  1,  4,  1,  1,  1,  9,  6, 13,  1,  0,  1,  0,
         4,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  5, 11,  8, 11,  1,  0,  0,  0,  7,  0,  0,  2,  0,  5,  3,  4,
         0,  9])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:265,lr:0.001,loss:2.1768,mean_loss:2.305,mean_f1:0.07:   3%|▏        | 261/10000 [00:06<04:02, 40.16it/s]

tensor([ 2,  7,  6,  2,  3,  0,  7,  1, 10,  0, 12,  5,  4,  1,  2,  2,  1, 12,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  2,  3,  2,  2,  0,  0,  4, 12,  3, 10,  1,  6,  1,  2,  2,  9,  6,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  2,  9,  5,  2, 10,  6,  3,  0,  3, 10,  0, 10,  4,  0,  3,  0,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 8, 0, 5, 4, 7, 7, 5, 1, 1, 3, 2, 1, 1, 3, 1, 3, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  4,  7,  3,  8,  1,  0,  0,  0, 10,  0,  1,  9,  0,  3,  0,  5,  2,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  2,  2,  9,  4,  4,  1,  4,  0,  2,  3,  1,  4,  5,  0, 11,  1,  1,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  5,  2,  7,  3,  7, 13,  2,  0, 11,  6, 12,  8,  0,  2,  0,  6,  7,
         1,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  

epoch:0,batch:273,lr:0.001,loss:2.307,mean_loss:2.305,mean_f1:0.071:   3%|▏        | 271/10000 [00:07<04:03, 39.94it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 2, 2, 0, 0, 2, 0, 2, 4, 0, 0, 0, 1, 7, 7, 3, 2, 5, 7, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  0,  0,  2,  7,  9,  4,  2,  1,  5,  1,  8, 11,  2,  5,  7,  8, 10,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  6, 10,  9,  8,  7,  2,  2,  0,  1,  6,  2,  4,  6,  2,  2,  0,  1,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4, 2, 1, 0, 2, 4, 4, 8, 3, 7, 1, 2, 2, 0, 2, 5, 2, 6, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  6, 10,  7,  0,  6,  3,  8,  0,  1,  2,  1,  0,  1,  0,  0,  1,  2,
         4,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  2,  0,  2,  4,  0,  8,  1,  8,  0,  0,  0,  2,  7,  1,  0,  0,  2,
         4,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  5,  3,  0,  1,  1,  1,  3,  3,  2,  3,  1,  4,  5, 10,  0,  0, 10,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:281,lr:0.001,loss:2.34,mean_loss:2.304,mean_f1:0.071:   3%|▎         | 281/10000 [00:07<04:01, 40.22it/s]

tensor([1, 0, 1, 3, 8, 1, 4, 7, 3, 1, 1, 1, 0, 1, 4, 2, 0, 0, 3, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  9,  4,  4,  1,  3,  2,  2,  3,  2, 12,  2,  4,  4,  0,  0,  8,  2,
         9,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 11,  1,  4,  4,  1,  0,  4,  1,  7,  5,  0,  2,  2,  1,  7,  4,  5,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 8, 2, 2, 0, 4, 1, 4, 0, 3, 6, 0, 5, 0, 0, 6, 0, 4, 1, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  0,  0,  1,  1,  0,  1,  5,  0,  2, 10,  0,  0,  5,  2,  8,  7,
         7,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3, 10,  0,  8,  1,  6,  1,  9,  1,  5,  0,  5,  3,  7,  0,  0,  3,
         6,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  1,  2,  2,  5,  7,  2,  2,  1,  1,  4,  4,  2,  9,  1, 12,  5,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  4,  0,  9,  1, 11,  2,  

epoch:0,batch:289,lr:0.001,loss:2.3383,mean_loss:2.302,mean_f1:0.071:   3%|▏       | 286/10000 [00:07<04:00, 40.32it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 1, 0, 3, 3, 7, 6, 1, 5, 7, 2, 5, 0, 2, 3, 4, 1, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 10,  3,  4,  5,  2,  0,  2,  3, 10,  0,  3,  4,  1,  6,  2,  0,  5,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 5, 0, 1, 2, 3, 3, 9, 2, 3, 1, 3, 0, 3, 0, 7, 1, 1, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  8,  0,  0,  0, 10,  7,  2,  0,  4,  2,  0,  5,  2,  2,  2,  5,
         1, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 9, 4, 3, 4, 1, 3, 1, 1, 7, 0, 3, 2, 1, 0, 1, 2, 2, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2, 11,  1,  8,  8,  0,  0,  0,  9,  4,  1,  6,  6,  3,  2,  0,  2,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  7,  5,  7,  2, 11,  2,  1,  2,  6,  3,  2,  2,  6,  2,  3,  2,  4,
         7,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  3,  

epoch:0,batch:296,lr:0.001,loss:2.3257,mean_loss:2.3,mean_f1:0.071:   3%|▎         | 296/10000 [00:07<04:03, 39.85it/s]

tensor([1, 1, 6, 0, 1, 5, 5, 1, 0, 3, 0, 1, 5, 3, 0, 5, 3, 9, 0, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 3, 0, 3, 7, 0, 3, 7, 2, 7, 6, 2, 4, 1, 1, 1, 4, 3, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4, 2, 2, 9, 9, 3, 0, 4, 2, 1, 1, 0, 7, 3, 2, 4, 6, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 5, 7, 1, 5, 2, 0, 1, 6, 2, 3, 0, 0, 2, 2, 2, 0, 1, 7, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  4,  1,  6,  0, 10,  1,  9,  4,  5,  3,  0,  1,  2,  2,  1,  7, 10,
         0,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 4, 1, 3, 5, 1, 8, 1, 7, 9, 4, 0, 5, 1, 8, 2, 1, 8, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  1,  2,  0,  5,  0,  1,  3,  4,  7,  3,  2, 10,  2,  2,  7,  0,  2,
         5, 11])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:304,lr:0.001,loss:2.0003,mean_loss:2.299,mean_f1:0.072:   3%|▏       | 300/10000 [00:07<04:04, 39.71it/s]

tensor([ 9,  8,  2,  3,  1,  1,  1,  6,  0, 10,  2,  1,  0,  6,  4,  3,  3,  0,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  7, 11,  0,  3,  2,  4, 10,  2,  5,  3,  0,  6,  6,  0,  2,  6, 12,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  4,  4,  3,  3,  1,  2,  2,  0,  4,  2,  8,  9,  8,  0, 10,  0,
         3, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 4, 5, 2, 1, 2, 3, 0, 1, 1, 0, 8, 6, 1, 1, 2, 3, 2, 8, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 2, 6, 3, 1, 3, 2, 0, 1, 1, 0, 4, 4, 1, 7, 0, 2, 2, 0, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  3,  7,  5,  7,  1,  5,  3,  1,  0,  2,  3,  8,  0,  3, 12,  0, 11,
         1,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 6, 4, 0, 1, 1, 0, 2, 0, 2, 5, 7, 1, 2, 1, 8, 3, 5, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 3, 3, 0, 1, 3, 0, 4, 5, 2, 2, 2, 2, 1, 9, 4, 3, 1, 5])


epoch:0,batch:312,lr:0.001,loss:2.3235,mean_loss:2.298,mean_f1:0.072:   3%|▏       | 310/10000 [00:07<04:01, 40.15it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  6,  5,  3,  3,  3,  1,  3,  3,  2,  0,  1,  2,  5,  5,  0,  4,
        10,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 5, 1, 4, 0, 0, 3, 1, 0, 1, 0, 6, 8, 3, 3, 0, 8, 4, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9, 12,  3,  1,  3, 10, 12,  2,  1,  1,  1,  0,  3,  1,  0,  7,  1,  2,
         4,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  4,  2,  4,  6,  0,  8, 10,  7,  4,  1,  4, 11,  6,  5,  1,  0,  9,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  3,  6,  3,  0,  1,  8,  1,  2,  2,  1,  3, 11,  1,  6,  2,  5,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 2, 1, 4, 9, 1, 3, 2, 0, 8, 0, 1, 4, 1, 4, 1, 0, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  6,  0,  3,  4,  4,  1, 10,  3,  9,  2,  2,  1,  2,  4,  0,  1,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:320,lr:0.001,loss:2.2658,mean_loss:2.298,mean_f1:0.072:   3%|▎       | 320/10000 [00:08<03:55, 41.04it/s]

tensor([ 3,  5, 11,  0,  5,  4,  2,  2,  1,  3,  4,  0,  5,  0,  4,  8,  0,  1,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 2, 0, 6, 2, 2, 0, 3, 1, 2, 1, 0, 3, 1, 8, 4, 0, 1, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 4, 3, 0, 0, 5, 0, 1, 1, 0, 6, 3, 7, 2, 5, 1, 2, 9, 2, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  3,  1,  6,  6,  1, 11,  5,  6,  8,  5,  6,  0,  3,  3,  2,  0,
        11,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  0,  8,  2,  3,  3,  4, 10,  4,  9,  3,  8,  0,  5,  9,  1,  2,  1,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  0,  8,  4,  8, 10,  0,  2,  8,  0, 11,  1,  5,  2,  1,  8,  0,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  8,  3,  3,  5,  2,  3,  2,  0,  4,  0,  2,  5,  1,  2,  2,  3, 11,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2, 12,  6,  2,  7,  4,  4,  

epoch:0,batch:328,lr:0.001,loss:2.3842,mean_loss:2.298,mean_f1:0.072:   3%|▎       | 325/10000 [00:08<03:55, 41.16it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  1,  1,  1,  2,  1,  5,  4,  1,  0,  0,  2,  0, 10,  0,  0,  5,  5,
         9,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  0, 10,  3,  1,  1,  0,  8,  2,  3,  1,  0,  1,  2,  1,  7, 10,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 1, 7, 0, 0, 7, 4, 2, 4, 4, 8, 8, 2, 7, 6, 1, 2, 0, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  6,  1,  4,  1,  3,  7,  0,  4,  0,  0,  9,  0,  3,  1,  1,  0,  1,
         7, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  7,  0,  7,  0,  9,  1,  7, 12, 12,  7,  5,  8,  5,  1,  4,  3,  0,
        10,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 13,  5,  2,  0,  3,  3,  5,  6,  5,  1,  2,  0,  0,  9,  3,  1,  4,
         3,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 4, 3, 4, 1, 3, 3, 2, 1, 1, 1, 0, 0, 3, 0, 8, 0, 2, 9, 6])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:336,lr:0.001,loss:2.0564,mean_loss:2.298,mean_f1:0.072:   3%|▎       | 335/10000 [00:08<03:52, 41.55it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 7, 1, 4, 0, 5, 1, 5, 0, 3, 4, 2, 7, 4, 3, 0, 0, 6, 4, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  7,  2,  0,  7,  0,  3,  1,  8,  1,  0,  1,  0,  1,  3,  8,  6,  0,
         0,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2, 10,  2,  2,  3,  6,  1, 10,  2,  1,  8,  2,  2,  1,  4,  7,  1,  6,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  2,  6,  1,  4,  9,  7,  8,  4,  0,  3,  7,  3,  4,  6, 11,  3,  0,
         6,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  5,  1,  5,  0,  3,  1,  6,  2,  7,  0,  7,  1,  9, 10,  1,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 10,  0,  9,  0,  1,  4, 10, 13,  4,  2,  0,  0,  7,  2,  2,  1,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 2, 1, 0, 1, 0, 1, 0, 3, 0, 1, 2, 1, 9, 4, 0, 2, 0, 1, 4])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:344,lr:0.001,loss:2.0792,mean_loss:2.297,mean_f1:0.073:   3%|▎       | 340/10000 [00:08<03:56, 40.82it/s]

tensor([ 0,  2, 10,  5, 10,  1,  0,  0,  1,  4,  5, 10,  2,  3,  2,  7,  2,  1,
         9,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  5,  3,  3,  2,  2,  2,  1,  4,  5, 10,  5,  1,  4,  7,  1,  4,
         5,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  6,  1,  0,  0,  2, 12,  0,  0,  1,  2,  2,  1,  1,  8,  9,  2,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  1,  1,  4,  3,  1,  0,  0,  6,  4,  4, 13,  1,  4,  1,  3,  1,
         1,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  8,  4,  2,  3,  3,  3,  0,  0, 12,  0,  0,  4,  1,  9,  0,  0,  4,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  6,  2,  7,  1,  1,  1,  0, 10,  2,  0,  7,  2,  7,  1,  8,  9,  8,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  4,  1,  7,  2,  5,  8,  1,  0,  0,  0,  1,  4,  7,  0, 10,  1,  1,
         3,  2])
batch_text: torch.Size([20, 1000]

epoch:0,batch:352,lr:0.001,loss:2.0588,mean_loss:2.295,mean_f1:0.072:   4%|▎       | 350/10000 [00:08<03:56, 40.88it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 2, 0, 2, 3, 6, 1, 0, 0, 4, 4, 3, 6, 0, 0, 0, 2, 0, 1, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  0,  0,  0,  1,  1,  0,  4,  5,  2, 12,  3,  5, 10,  6,  9,  8,
         1,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  5,  0,  2,  2,  4,  5,  6,  2,  3, 10,  4,  2, 11,  0,  3,  3,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 2, 1, 3, 0, 1, 0, 2, 0, 3, 1, 0, 1, 7, 8, 1, 1, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  8,  2,  1,  1,  2,  3,  0,  8,  0,  8,  0,  4,  8, 11,  2,  6,  4,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  7,  1,  4, 10,  3,  2,  0,  0,  1,  2,  8,  0,  1,  3,  0,  3,
         1,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  0,  6,  7, 10,  0,  0,  2,  4,  4,  8,  2,  0,  3,  2,  0,  7,
         4,  4])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:360,lr:0.001,loss:2.4203,mean_loss:2.293,mean_f1:0.072:   4%|▎       | 360/10000 [00:09<03:57, 40.65it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  0,  8,  2,  2,  0,  2,  5,  4,  1,  1,  0,  2,  1,  1,  2,  4,  5,
         8,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 6, 5, 1, 0, 3, 0, 5, 0, 0, 4, 3, 7, 3, 0, 0, 3, 0, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  3,  3,  3,  2,  5,  9,  4, 11,  6,  0,  2,  2,  2,  2,  6,  5,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 7, 1, 8, 3, 0, 4, 2, 2, 2, 0, 1, 0, 0, 2, 1, 2, 4, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  1,  9,  7,  2,  7, 11,  3,  3,  1,  4,  2,  7,  3,  1,  3, 11,
         9, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 2, 6, 2, 3, 1, 0, 0, 0, 4, 1, 0, 1, 2, 1, 5, 6, 4, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 1, 4, 1, 0, 6, 1, 0, 3, 2, 3, 1, 1, 0, 3, 2, 0, 0, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  5,  1,  3,  6,  0,  1,  8,  2,  6,  

epoch:0,batch:368,lr:0.001,loss:2.3165,mean_loss:2.291,mean_f1:0.072:   4%|▎       | 365/10000 [00:09<03:56, 40.80it/s]

tensor([6, 1, 3, 2, 2, 2, 0, 8, 2, 2, 1, 0, 0, 0, 5, 3, 0, 2, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4,  5,  1,  0,  0,  8,  0,  3,  3,  0,  6,  1,  5,  5,  4, 10, 12,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 3, 1, 2, 5, 1, 2, 7, 3, 0, 2, 9, 0, 7, 2, 2, 0, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  0,  5,  1,  2,  4,  0,  1,  0, 12,  1,  8,  6,  0,  0,  0,  2,
         4, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 2, 1, 4, 0, 1, 2, 3, 3, 2, 0, 2, 1, 0, 0, 3, 0, 0, 3, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  0,  0,  2,  6,  7,  1, 10,  0,  0,  4,  7,  1, 11,  0,  2,  8,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1,  0,  3,  7, 10,  0,  2,  0, 10,  0,  1,  1,  8,  0,  2,  4,  5,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  3,  7,  0,  1,  4,  5, 12, 10,  2,  2,  1,  2,  0,  

epoch:0,batch:376,lr:0.001,loss:2.469,mean_loss:2.289,mean_f1:0.072:   4%|▎        | 375/10000 [00:09<03:58, 40.40it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  4,  0,  2,  3,  0, 10,  3,  2,  2,  3,  0,  3,  1,  3,  2,  0,
         4,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 0, 6, 8, 1, 0, 2, 0, 1, 3, 8, 2, 2, 5, 2, 0, 1, 2, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  2,  3,  1,  1,  9,  1, 10,  0,  1,  0,  1, 10,  4,  0,  0,  6,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  4, 12,  0,  6,  0,  1,  8, 10,  1,  0,  9,  2,  0,  5,  1,  1,  4,
         2,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  1, 12,  2,  2, 13,  9,  8,  0,  1,  0,  2,  2,  1,  3,  1,  5,
         1,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  2,  2,  3,  1,  2,  5,  3,  1,  1, 10,  0,  2,  1,  5,  5,  0,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 4, 2, 0, 3, 0, 2, 2, 8, 0, 2, 6, 3, 3, 1, 2, 0, 9, 1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:384,lr:0.001,loss:2.4976,mean_loss:2.29,mean_f1:0.072:   4%|▎        | 380/10000 [00:09<04:00, 40.04it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  1,  0,  0,  3, 12,  1,  7, 10,  1,  0,  1,  2,  1,  8,  2,  3,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  1,  0, 10,  0,  0,  2,  0,  3,  3,  1,  4,  4, 11,  0,  4, 11,
         6,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([13,  7,  2,  1,  1,  9,  8,  7,  0, 10,  1,  1,  6,  4,  0,  3,  3,  6,
         3,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  2,  8,  1,  0,  6, 10,  0,  2,  6,  0,  0,  6,  1,  9,  9,  9,  2,
         2, 12])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 1, 2, 7, 0, 0, 6, 7, 0, 2, 1, 8, 2, 6, 0, 6, 2, 7, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 3, 1, 0, 2, 4, 2, 3, 3, 3, 5, 0, 9, 5, 0, 0, 1, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  2,  6,  4,  3,  3, 10,  2,  0, 11,  1,  1,  1,  7,  2,  3,  0,
         2,  6])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:391,lr:0.001,loss:2.0983,mean_loss:2.292,mean_f1:0.071:   4%|▎       | 390/10000 [00:09<04:09, 38.54it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  9,  3,  3, 10, 12,  2,  3,  1,  8,  3,  6, 12,  0, 12,  8,  2,
         8,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  3,  1,  0,  3,  8,  0,  1,  2, 10,  0,  6,  6,  4,  6,  8,  8,  1,
         1,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  7, 11,  3,  5,  3,  0,  0, 12, 10,  8,  1,  2,  6,  4,  0,  2,  1,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 2, 1, 0, 2, 3, 6, 6, 6, 3, 3, 4, 6, 8, 8, 1, 6, 7, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  4,  5,  6,  0,  3,  4,  1, 12,  1,  1,  0,  3,  0,  3,  8,  4,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1,  0,  3,  5,  2,  2,  0,  5,  0,  0,  2,  0,  0,  0, 12,  0,  5,
         2, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 6, 0, 4, 3, 2, 3, 1, 0, 8, 0, 1, 0, 0, 6, 1, 2, 7, 0])


epoch:0,batch:399,lr:0.001,loss:2.4097,mean_loss:2.293,mean_f1:0.072:   4%|▎       | 400/10000 [00:10<04:01, 39.67it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  2,  1,  1,  0,  5,  2,  2,  5,  4,  7,  4,  2,  0,  2,  1,  0,
         2, 13])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  5,  0,  1,  2,  1,  3,  0,  2,  1,  2,  0,  5,  1,  2,  6, 11, 10,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 4, 1, 2, 5, 2, 7, 0, 1, 4, 7, 1, 9, 0, 2, 3, 1, 7, 2, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  1,  3,  4,  2,  2,  8,  3,  1,  1, 10,  1, 12,  4,  0,  1,  1,
         5,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 7, 2, 2, 1, 0, 9, 2, 5, 1, 0, 7, 0, 0, 2, 7, 7, 3, 0, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  5,  2, 12,  0,  2,  5,  7,  4,  4,  2,  1,  4,  1,  0, 12,  2,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 5, 2, 7, 4, 1, 6, 2, 7, 3, 0, 7, 7, 3, 7, 0, 0, 0, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  5,  

epoch:0,batch:407,lr:0.001,loss:2.2526,mean_loss:2.294,mean_f1:0.071:   4%|▎       | 404/10000 [00:10<04:03, 39.48it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  8,  6,  3,  7,  7,  1,  8,  8,  4,  1,  0,  4,  6,  0,  2,  1,  7,
         7, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  3,  1,  6,  1,  5,  9,  3,  4, 10,  7,  0,  8,  1,  2,  1,  1,  8,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  5,  6,  1,  1,  0,  1,  4,  1,  6,  3,  1,  6,  6,  1, 10,  4,  3,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2, 11,  0,  0,  2,  2,  1,  5,  4,  7,  1,  5, 10,  3,  0,  4,  3,
         3,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  0,  4,  1,  1,  3,  0,  1,  1,  2,  5,  8,  4,  5,  8,  7,  0,  0,
         2,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 1, 5, 0, 2, 7, 7, 3, 1, 2, 0, 4, 1, 0, 8, 7, 1, 5, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  5,  9,  0,  2,  0,  3, 11,  3,  2,  3,  1,  0,  1,  3, 10,  6,  1,
         9,  7])
batch_text: t

epoch:0,batch:415,lr:0.001,loss:2.5474,mean_loss:2.294,mean_f1:0.071:   4%|▎       | 414/10000 [00:10<03:58, 40.19it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  3, 11,  7,  2,  5,  0,  4,  4,  8,  2,  3,  1, 11,  3,  3,  1,  2,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  1,  1,  2,  2, 10,  0,  1,  6,  1,  0,  1,  2,  1,  3,  1,  7,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 0, 5, 0, 3, 1, 2, 0, 0, 0, 7, 7, 9, 2, 2, 7, 2, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  3,  1,  7,  0,  3,  1,  0,  7,  5,  0,  0,  1,  4,  6,  4,  7,  0,
        11,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  0,  1, 10,  1,  0,  5,  0,  3,  4,  1,  7,  7,  6, 11,  4,  0,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  1,  1,  6,  0,  5,  3,  2,  2,  1,  1,  3,  2, 10,  2,  2,  0,
         7,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  0,  1, 12,  7,  0, 12, 10,  0,  4,  8,  5,  2,  2,  0,  3,  2,  4,
         3, 10])
batch_text: t

epoch:0,batch:423,lr:0.001,loss:2.4141,mean_loss:2.295,mean_f1:0.071:   4%|▎       | 419/10000 [00:10<03:59, 40.01it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  8,  9,  0,  5,  1,  7,  0,  7,  1,  2,  2,  2, 11,  4,  3,  1,  0,
         2,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  4,  2,  0,  7,  6,  1,  7,  1,  2,  7,  4, 11,  9,  3,  1,  1,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 9, 4, 2, 7, 8, 3, 2, 7, 6, 3, 0, 0, 1, 3, 4, 1, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  6,  2,  9,  6, 10,  1,  9,  4,  9,  1,  0,  2,  5,  1, 12,  7,  3,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 1, 2, 1, 0, 7, 3, 2, 0, 2, 2, 6, 2, 1, 2, 1, 3, 4, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  9,  1,  1,  0, 11,  3,  2,  2,  5, 11,  3,  5,  2,  3,  2,  1,  3,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  5,  5,  2,  0,  4,  0,  4,  3,  5,  0,  8,  0,  3,  3,  4,  7, 10,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:431,lr:0.001,loss:2.1167,mean_loss:2.293,mean_f1:0.071:   4%|▎       | 428/10000 [00:10<04:01, 39.62it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 11, 11,  3,  1,  4,  5,  0,  1,  4,  3,  0,  2,  0,  0,  2,  0,  2,
         2, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  4,  1,  4,  2,  0,  0,  0,  0,  9,  1, 11,  2,  2,  2,  0,  0,  1,
         4,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 1, 1, 0, 0, 1, 2, 3, 5, 0, 7, 6, 7, 0, 1, 2, 8, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 2, 0, 5, 2, 2, 7, 1, 0, 4, 3, 0, 9, 0, 2, 2, 3, 4, 6, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  1,  3, 13,  0,  5,  7,  2,  2,  6,  0,  2,  2,  3,  1,  8,  5,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4, 10,  3,  4,  1,  3,  4,  6,  3,  1,  5,  0,  3,  7,  2,  0,  4,  6,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 3, 0, 4, 4, 1, 0, 7, 3, 1, 7, 9, 6, 0, 0, 1, 1, 5, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 3, 2, 5

epoch:0,batch:438,lr:0.001,loss:2.1606,mean_loss:2.293,mean_f1:0.071:   4%|▎       | 437/10000 [00:11<04:02, 39.48it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  0,  9, 10,  6,  2,  1,  3,  1,  4,  0,  1,  4,  1,  7,  1,  1,  5,
         0,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  4,  1,  2,  0, 13,  1,  2, 12,  8,  1,  6,  6,  3,  1,  1,  9,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  3,  3,  2,  1,  1,  3,  4,  0,  3,  8,  0,  2,  2, 10,  3,  7,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  2,  0,  8,  7,  0,  1,  1,  0,  9,  2, 10,  3,  4, 10,  3,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 10,  0,  5,  1,  4,  3,  0,  1,  0,  4,  8,  0,  3,  0,  0,  1,  8,
         8,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 2, 0, 2, 2, 1, 0, 6, 9, 1, 0, 8, 9, 6, 1, 0, 9, 1, 0, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  1, 11,  4,  1,  1,  0,  9,  9,  3,  1,  2,  3,  2,  1,  4,  6,
         2,  2])
batch_text: t

epoch:0,batch:446,lr:0.001,loss:2.3651,mean_loss:2.293,mean_f1:0.072:   4%|▎       | 446/10000 [00:11<04:00, 39.69it/s]

tensor([1, 1, 3, 3, 5, 5, 2, 3, 0, 6, 3, 3, 8, 2, 6, 1, 3, 2, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  0,  2,  1,  0,  0,  1,  3,  1,  5, 10,  2,  1, 10,  1,  3,  2,
         0,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  0, 12, 10,  0,  0,  4,  0,  2,  3,  4,  0,  1,  0,  5,  4,  5,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 3, 0, 2, 1, 2, 4, 2, 4, 2, 8, 2, 7, 5, 7, 5, 4, 1, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 4, 5, 4, 8, 0, 0, 3, 1, 1, 5, 4, 0, 6, 1, 0, 2, 2, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  5,  6,  2,  0,  0,  4,  6,  4, 10,  0,  3,  1,  8,  7,  3,  2,  2,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  1,  5,  7,  2,  5,  1,  2,  1,  2,  2,  8,  1,  0,  1,  5,  2,
         8, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 12, 11,  7,  0,  0,  9,  3,  2,  6,  1,  5,  2,  8,  0,  

epoch:0,batch:454,lr:0.001,loss:2.4551,mean_loss:2.292,mean_f1:0.072:   5%|▎       | 451/10000 [00:11<03:57, 40.20it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  3,  3,  1,  0,  4,  4,  3,  2,  1, 11,  6,  1,  8,  2,  8,  1,
         7,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  9,  5,  1,  1,  1,  4,  4,  4, 10,  8,  7,  0,  0, 12,  3,  4,  0,
         8,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  1,  4, 12,  2,  0,  1,  2,  2,  9,  1,  1,  7,  4,  3,  1,  2,  2,
         4,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  4,  2,  0,  3, 10,  0,  5,  3,  8,  5,  1,  0,  7,  3,  1,  0,  1,
         8,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  4,  2,  1,  1,  3,  6,  5,  4, 10, 10,  2,  2,  5,  5,  8,  2,  1,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 2, 1, 0, 2, 7, 2, 2, 3, 0, 0, 3, 8, 1, 1, 4, 2, 2, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 1, 0, 2, 4, 5, 1, 1, 2, 5, 8, 1, 2, 4, 3, 1, 0, 3, 2])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:461,lr:0.001,loss:2.1609,mean_loss:2.29,mean_f1:0.072:   5%|▍        | 461/10000 [00:11<04:03, 39.14it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 1, 2, 0, 3, 0, 2, 3, 1, 0, 7, 2, 2, 2, 0, 0, 3, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10, 13,  0,  0,  0,  6,  1,  8,  2,  2,  6,  2,  2,  9,  0,  3,  2,  7,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  6, 13,  3,  0,  8,  5,  0,  9,  2,  6,  3,  5,  4,  0,  9, 10,  1,
         3,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  4,  4,  3,  2,  1,  2,  9,  3,  0,  1,  1,  5,  9,  7,  9,  0,  2,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 5, 1, 3, 0, 1, 1, 0, 1, 1, 0, 1, 8, 2, 2, 1, 2, 2, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 1, 0, 2, 6, 8, 2, 2, 0, 2, 0, 4, 5, 1, 2, 0, 0, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  7,  2,  0,  0, 11,  0,  7,  9,  6,  0,  0,  3,  0,  1,  1,  1,
         4,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  3,  0,  

epoch:0,batch:469,lr:0.001,loss:2.4799,mean_loss:2.29,mean_f1:0.072:   5%|▍        | 469/10000 [00:11<04:03, 39.20it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1, 10,  2,  2,  6,  3,  1,  0,  9,  1,  1,  1,  4,  8,  1,  3,  0,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  4,  7,  0,  1,  8,  5,  5,  0,  1,  0,  5,  0,  2,  0,  1,  0,  0,
        11,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  1,  0, 10,  0, 11,  0,  0,  4,  6,  1,  2,  8,  1,  3, 10,  0,
         2,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 0, 0, 8, 0, 1, 5, 2, 6, 4, 0, 8, 2, 2, 3, 3, 1, 0, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 10,  6,  6, 10,  1,  0,  0,  1,  6,  0,  0,  3,  0,  4,  2,  3,  0,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  9,  3,  5,  7,  3,  0,  2,  0,  0,  2,  0,  1,  2, 10, 10,  2,
         6,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  3,  0,  9,  1,  1,  3, 11,  7,  0,  2,  1,  2,  6,  1, 13,  5,
         0,  5])
batch_text: t

epoch:0,batch:477,lr:0.001,loss:1.9412,mean_loss:2.29,mean_f1:0.071:   5%|▍        | 473/10000 [00:12<04:04, 38.89it/s]

tensor([5, 3, 0, 1, 4, 0, 6, 2, 0, 5, 0, 0, 5, 2, 2, 0, 7, 2, 1, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  2,  1,  3,  5, 11,  1,  0,  5,  5,  0,  4,  5,  9,  1,  0,  0,
         7,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  6, 12, 11,  4,  2,  5,  2,  5,  6,  3,  7,  0,  8,  2,  1,  6,  1,
         9,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 3, 8, 1, 0, 1, 2, 2, 0, 3, 0, 2, 0, 1, 4, 2, 3, 6, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  3,  1,  2,  1,  1,  0,  3,  2,  7,  9,  2,  0, 10,  5,  6,  1, 12,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 9, 5, 7, 5, 2, 6, 2, 8, 4, 0, 1, 5, 2, 1, 0, 0, 6, 5, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  0,  2,  1,  9,  9,  6,  1,  5,  0,  6,  2, 11,  0,  3,  0,  5,  7,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  1,  0,  3,  2,  0,  0,  2, 10,  7,  2,  1,  3,  1,  

epoch:0,batch:485,lr:0.001,loss:2.5103,mean_loss:2.29,mean_f1:0.071:   5%|▍        | 483/10000 [00:12<03:56, 40.23it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  0,  1, 10,  2,  1,  1,  3,  2, 11,  6,  5,  2,  4,  0,  0,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  2,  5,  1,  1,  0,  2,  0, 13,  2,  1,  7,  5,  1,  1,  1,  1,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  5,  8,  8,  3,  1,  2,  3, 10,  0,  5,  2,  0,  0,  8,  2,  8,  5,
         1, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  4,  1,  3, 10,  0,  0,  1,  0,  1,  1,  5, 11, 10,  1,  1,  0,  5,
         7,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  1,  2,  0,  0,  0,  3,  7,  2,  2,  3,  0,  2, 12,  0,  0,  0,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1, 10,  6,  5,  1,  1,  4,  1,  2,  5,  9,  5,  1,  4,  1,  0,  6,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 2, 5, 9, 3, 1, 8, 9, 6, 5, 1, 7, 2, 0, 1, 3, 4, 7, 1, 2])
batch_text: t

epoch:0,batch:493,lr:0.001,loss:2.4136,mean_loss:2.291,mean_f1:0.071:   5%|▍       | 493/10000 [00:12<03:57, 39.98it/s]

tensor([ 1,  0, 11,  1,  5,  2,  3,  4,  1,  6,  1, 11,  0,  9,  7,  2,  6,  0,
        10,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  8,  0,  2,  3,  0,  2,  3,  9,  2,  3,  1,  8,  3,  1,  2,  0,  1,
        12,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 5, 0, 1, 1, 3, 1, 4, 5, 3, 2, 5, 4, 0, 4, 2, 2, 1, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  2,  3,  1,  1,  7,  9,  1,  2,  2,  5,  0,  9,  2,  4,  2,  1,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 1, 0, 5, 1, 2, 0, 0, 2, 4, 0, 2, 0, 1, 2, 2, 1, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  0,  0,  0,  1,  3,  2,  2,  4,  0,  5,  0, 10,  4, 13,  2,  2,  8,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  7,  2,  4,  5, 10,  4,  0,  4,  7,  7,  9,  6,  0,  2,  4,  1,  7,
         5,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  3,  5,  2,  5,  8,  0,  0,  

epoch:0,batch:500,lr:0.001,loss:2.4733,mean_loss:2.29,mean_f1:0.071:   5%|▍        | 501/10000 [00:12<04:01, 39.33it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  4,  4, 10,  0,  0,  3,  0,  3,  0,  0,  0,  2,  1,  0,  3,  2,
         2, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2, 10,  2, 10,  1,  5,  3,  9,  1,  4,  2,  2,  2,  6,  2,  2,  3,  3,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  5,  0, 10,  0,  2,  3,  9, 13,  1,  2,  2,  3,  1, 11,  0,  2,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 0, 8, 0, 6, 3, 0, 1, 1, 5, 1, 7, 2, 8, 1, 1, 0, 5, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  8,  9,  0,  5,  0,  1,  0,  1,  0,  0,  7,  1,  3, 10,  0,  2,  0,
         4,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 6, 5, 0, 8, 5, 4, 3, 1, 2, 1, 9, 5, 1, 4, 0, 1, 4, 5, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 13,  0,  1,  5,  5,  5,  1,  4,  0,  3,  5,  4,  5,  7,  9,  1,  8,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:508,lr:0.001,loss:2.3006,mean_loss:2.29,mean_f1:0.071:   5%|▍        | 505/10000 [00:12<04:00, 39.43it/s]

tensor([0, 2, 7, 8, 5, 5, 1, 4, 2, 4, 3, 0, 3, 7, 8, 3, 3, 0, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3, 13,  3,  3,  1, 10,  6,  1,  7,  3,  2,  0,  2,  0,  3,  3,  3,
         9,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  4,  3,  2,  1,  1,  1,  2,  5,  1, 10,  1,  0,  2,  1,  7,  3,
         3,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  3,  3,  2,  0,  2,  0,  2,  1,  1,  3,  0,  8,  4,  8,  2,  3,  8,
         0, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  3,  7,  5,  7, 11,  3,  5,  0,  2,  2,  2,  1,  0,  5,  9,  0,
         0,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  2,  9, 13,  2,  7,  0,  3,  1,  2,  3,  1,  3, 11,  4,  8,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 4, 2, 2, 3, 5, 2, 3, 9, 0, 2, 1, 1, 1, 3, 1, 2, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  6,  3,  2,  1,  3,  2,  1,  

epoch:0,batch:516,lr:0.001,loss:2.1612,mean_loss:2.289,mean_f1:0.071:   5%|▍       | 514/10000 [00:13<03:59, 39.56it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  2,  5,  5,  1,  4,  7,  2,  4,  0,  5,  2,  6,  1,  1, 10,  1,
         4,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 4, 1, 0, 2, 2, 0, 0, 4, 2, 5, 3, 0, 4, 4, 1, 3, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  1, 11,  0,  1,  2,  0,  0,  1,  0,  2,  8,  5,  5,  0,  1,  2,  5,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4, 11,  3,  4,  1,  0, 10,  7,  0,  9,  0,  9,  2,  0,  2,  0,  1,  9,
         7,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 1, 8, 9, 1, 3, 6, 1, 2, 2, 5, 7, 1, 0, 8, 2, 5, 4, 9, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  1, 11,  1,  2,  8,  2,  4,  5,  9,  2,  2,  2,  2,  5,  2,  1,  0,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  5,  4,  5,  3, 12,  5,  7,  1, 10, 10,  1,  0,  7,  2,  1, 10,  5,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:524,lr:0.001,loss:2.4032,mean_loss:2.289,mean_f1:0.072:   5%|▍       | 524/10000 [00:13<03:55, 40.24it/s]

tensor([ 7, 10,  3,  8,  0,  3,  7,  3,  8,  1,  2,  7,  0,  2,  2,  0,  1,  4,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 2, 5, 0, 2, 2, 2, 6, 3, 1, 0, 2, 1, 0, 4, 2, 2, 3, 0, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  7,  5,  1, 11,  1,  5,  1,  0,  8,  2,  3,  2,  0,  1,  2,  0,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  1,  4,  0,  2, 12, 10,  5,  1,  1,  6,  1,  0,  2,  0,  2,  0,
        10,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  0,  4,  1, 10,  6,  4, 10, 11,  5, 10,  7,  3,  4,  2, 10,  1,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  5,  3,  5,  2,  9, 10,  0, 12,  1,  2,  9, 10,  1,  3,  0,  0,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 5, 5, 3, 7, 1, 3, 2, 1, 2, 7, 8, 3, 2, 0, 1, 1, 1, 3, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  9,  3,  7,  2,  5, 12,  0,  

epoch:0,batch:532,lr:0.001,loss:2.225,mean_loss:2.289,mean_f1:0.072:   5%|▍        | 529/10000 [00:13<03:55, 40.20it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  0,  5,  0,  1,  0,  1,  3,  0,  0,  0,  6,  1,  5, 12,  5,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  6,  1, 11,  3,  4,  7,  1,  0,  0,  2,  6, 10, 12,  1,  8,  1,  0,
        10,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  2,  4,  4,  6,  0,  3,  0, 10,  1,  1,  3,  7,  0,  0,  1,  3,  1,
         7,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  7,  6, 10,  0, 12,  6,  8,  3,  1,  8,  2,  2,  2,  0,  0,  6,  3,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 5, 2, 1, 4, 3, 2, 2, 4, 3, 4, 3, 1, 6, 2, 9, 2, 5, 3, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  4,  1,  0,  2,  3,  2,  8,  1,  5,  4, 11, 11,  0,  1,  0,  9,  1,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  0,  2,  2,  3,  0,  0,  1,  3,  4,  6,  0,  8,  5,  2,  1, 11,  2,
        10,  6])
batch_text: t

epoch:0,batch:540,lr:0.001,loss:2.1313,mean_loss:2.287,mean_f1:0.072:   5%|▍       | 539/10000 [00:13<03:51, 40.82it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 6, 1, 3, 2, 4, 1, 0, 0, 0, 5, 2, 7, 0, 1, 2, 0, 2, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  3,  0,  2,  3,  3,  2,  0, 10,  1,  2,  3,  1,  0,  1,  7,  3,  6,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  7,  2,  0,  1,  1, 11,  1,  2,  1,  8,  2,  3,  5,  8,  5,  7,  1,
         8,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([13,  4,  1, 11,  3,  1,  1,  2,  1,  1, 10,  2,  2,  0,  0,  0,  2,  0,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 3, 1, 3, 0, 8, 4, 1, 2, 1, 0, 5, 4, 1, 0, 2, 2, 2, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 1, 0, 0, 1, 1, 3, 2, 0, 4, 3, 3, 0, 1, 1, 2, 4, 4, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  1,  3,  8,  9,  3,  6,  2,  2,  1,  1,  0,  4,  1,  1,  1,  2,  1,
         2, 13])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  3,  

epoch:0,batch:547,lr:0.001,loss:2.2499,mean_loss:2.287,mean_f1:0.072:   5%|▍       | 544/10000 [00:13<03:53, 40.47it/s]

tensor([1, 0, 2, 3, 2, 9, 1, 6, 2, 0, 3, 0, 3, 4, 7, 1, 0, 0, 2, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4, 1, 7, 6, 3, 3, 4, 1, 7, 7, 0, 7, 1, 5, 0, 5, 3, 1, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 1, 5, 5, 1, 0, 7, 4, 7, 0, 1, 0, 3, 2, 1, 1, 2, 7, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  6,  3,  4,  0,  0,  5,  7,  0,  3,  1,  2,  2,  4, 11,  2,  4,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4, 0, 0, 0, 2, 9, 3, 6, 0, 7, 0, 3, 0, 2, 2, 0, 4, 8, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  7,  5,  2,  6,  6,  1,  0,  9,  1,  6,  1,  3,  4,  0, 11,  7,
         1, 13])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  8,  0,  3,  3, 10,  0,  2,  5,  1,  0,  0,  0,  0, 12,  4,  0,  0,
         1,  0])


epoch:0,batch:555,lr:0.001,loss:2.0315,mean_loss:2.286,mean_f1:0.072:   6%|▍       | 554/10000 [00:14<04:01, 39.17it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  1,  3,  0,  1,  2,  0, 12,  4,  3,  9,  8,  4,  1,  1,  2,  0,  0,
         4,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 1, 4, 3, 1, 3, 1, 0, 2, 2, 5, 3, 3, 1, 8, 6, 1, 9, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  2,  3,  4,  2,  1,  1,  1,  9,  2,  8, 10,  4,  0,  5,  9,  4,  2,
         0,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  1,  5,  1,  5,  0, 10,  3,  3,  1,  0,  4,  0,  0,  1,  6,  0,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  1,  0,  3,  0,  0,  2,  2,  1,  4, 10,  8,  3,  3,  2,  0,  7,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6, 12,  5,  1,  2,  4,  1,  1,  0,  4,  0,  7,  0,  2,  4,  2,  5,  8,
         5,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 7, 0, 1, 0, 2, 1, 7, 2, 2, 8, 7, 5, 0, 1, 2, 3, 3, 6, 1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:563,lr:0.001,loss:2.4292,mean_loss:2.286,mean_f1:0.072:   6%|▍       | 559/10000 [00:14<03:58, 39.63it/s]

tensor([ 0,  5,  0,  3,  6,  7,  0,  9,  4,  1,  1, 11,  3,  2,  1,  1,  1,  1,
         4,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  0,  7,  0,  7,  9,  0,  6,  0,  1, 11,  5,  1,  1,  3,  1,  0,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  0,  0, 10,  8,  2, 10,  2,  3,  7,  5,  3,  1,  3,  2,  2,  9,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  0,  5,  2,  0,  1,  4,  3,  3,  3,  0,  0,  0, 10, 12,  0,  0,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  6,  8,  0,  1,  8,  1,  3,  4,  1,  1, 12,  1,  2,  9,  2, 11, 11,
         3,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  4,  1,  1,  4,  0, 10,  4, 13,  1,  1,  5,  6,  5,  3,  1,  5,  1,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  1,  1,  0,  1,  0, 10,  4, 10,  0,  1,  4,  0, 10,  0,  5,  2,
         7, 10])
batch_text: torch.Size([20, 1000]

epoch:0,batch:571,lr:0.001,loss:2.2073,mean_loss:2.287,mean_f1:0.072:   6%|▍       | 569/10000 [00:14<03:55, 39.97it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  1,  2,  9,  4,  1,  8,  1,  8,  7,  5,  0,  9, 13,  2,  5,  5,  0,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  1,  7,  8,  5,  7,  7,  1, 10,  2,  2,  2,  3,  1,  2,  5,  2,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0,  6,  2,  1,  1,  1,  1,  1, 10,  1,  2,  8,  0,  0,  4,  1,  2,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 6, 0, 1, 8, 3, 2, 3, 4, 6, 3, 1, 6, 1, 0, 1, 0, 8, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 1, 3, 5, 4, 1, 3, 2, 0, 4, 0, 3, 0, 6, 2, 9, 2, 4, 8, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  3,  2,  0,  2,  3,  2,  5, 10,  6,  2,  0,  1,  1, 10,  4,  8,  2,
         2,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  0,  0, 12,  4,  4,  3,  3,  4,  9,  4,  0,  0,  9,  2,  1,  3,
         4,  0])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:579,lr:0.001,loss:1.9672,mean_loss:2.285,mean_f1:0.072:   6%|▍       | 579/10000 [00:14<03:56, 39.83it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  1, 10,  2,  1,  1,  2,  3,  0,  0,  3,  3,  1,  2,  1, 11,  5,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  0,  7,  0,  9,  3,  0, 11,  8,  2,  0,  1,  1,  6,  3,  0,  4,
         7,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  2,  3,  5,  9,  0,  3,  0,  9,  7,  0,  3,  2,  2,  8,  4, 10,  5,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  4,  3,  8,  6,  5,  2, 10,  3,  1,  1,  1,  0,  1,  0,  2,  5,
        10,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  3,  5, 10,  7,  3,  3,  0,  1,  3,  2,  3,  2,  5,  4,  2,  3,  2,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  0,  1,  0,  2,  5,  7,  2,  0, 11,  4,  1,  3,  8,  2,  1,  0,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 1, 2, 2, 3, 1, 2, 1, 5, 3, 1, 4, 3, 2, 4, 3, 3, 2, 0, 2])
batch_text: t

epoch:0,batch:587,lr:0.001,loss:2.5506,mean_loss:2.285,mean_f1:0.073:   6%|▍       | 583/10000 [00:14<04:01, 38.93it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  1, 11,  0,  3,  1,  1,  1,  1,  1,  1,  0,  3,  0,  6,  4, 11,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  2,  7,  1,  3,  0,  2,  2,  7,  3,  2,  2,  2,  1,  0,  2,  2, 10,
         8, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  2,  3,  7,  8,  1,  0,  8,  5,  1,  1,  4,  0,  2,  1,  1, 11,  2,
         4, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 9, 3, 2, 0, 1, 2, 0, 5, 1, 8, 1, 5, 2, 0, 1, 4, 6, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 1, 4, 5, 2, 3, 6, 2, 9, 2, 0, 6, 9, 8, 3, 7, 2, 8, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 1, 2, 3, 1, 1, 3, 3, 1, 0, 1, 1, 0, 4, 2, 0, 8, 3, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  5,  5,  2,  6,  3,  0,  9,  0,  4,  4,  4,  2, 11, 12,  1,  0, 10,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  4,  1,  

epoch:0,batch:595,lr:0.001,loss:2.3352,mean_loss:2.285,mean_f1:0.073:   6%|▍       | 593/10000 [00:15<03:52, 40.40it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  5,  5,  6,  1,  7,  0,  0,  3,  0,  3,  5, 10,  0,  0,  2,  0,  1,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 3, 2, 0, 2, 5, 2, 0, 4, 7, 5, 2, 2, 1, 8, 4, 0, 4, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  0,  6,  1,  0,  3,  0,  0, 10,  4,  0,  0,  6,  1,  0,  1,  0,
         4,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  8,  6,  9,  4,  8,  2,  7,  1,  1,  7,  4,  0,  2,  9,  2,  1,  0,
         2, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  5,  3,  0,  0,  2, 13, 11,  8,  3,  4,  3,  2,  0,  4,  4,  2,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  5,  3,  0,  3,  2,  0,  0,  2,  0,  3,  7,  0,  0,  0, 10,  0,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 6, 2, 3, 6, 6, 1, 0, 0, 5, 9, 6, 0, 5, 1, 0, 2, 1, 2, 6])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:603,lr:0.001,loss:2.1224,mean_loss:2.283,mean_f1:0.073:   6%|▍       | 603/10000 [00:15<03:50, 40.76it/s]

tensor([ 0,  2,  2,  2,  1,  6,  0,  3,  0,  1,  1,  3,  9,  7,  0,  1,  2, 10,
         8,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 1, 1, 0, 0, 2, 4, 2, 1, 6, 0, 0, 2, 9, 4, 0, 1, 1, 4, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 2, 2, 5, 2, 3, 7, 6, 0, 6, 0, 1, 3, 5, 5, 2, 0, 4, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  3,  2,  2,  1,  7,  7,  0,  2,  1,  0,  2,  3,  1,  0,  1,  9,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 3, 4, 8, 4, 2, 2, 0, 6, 1, 9, 0, 2, 3, 6, 1, 1, 1, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 3, 1, 2, 4, 8, 0, 1, 0, 3, 1, 1, 4, 3, 5, 4, 5, 4, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 0, 4, 8, 0, 1, 6, 5, 3, 2, 1, 6, 0, 7, 3, 1, 3, 1, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 0, 1, 5, 1, 3, 1, 2, 3, 1, 0, 0, 7, 3, 8, 2, 9, 4, 6, 0])


epoch:0,batch:610,lr:0.001,loss:2.0602,mean_loss:2.282,mean_f1:0.073:   6%|▍       | 608/10000 [00:15<03:50, 40.66it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  7,  9,  0,  3,  5,  2,  2,  2,  1,  2,  2,  1,  1,  7, 10,  1,
        12,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 1, 3, 0, 7, 3, 2, 0, 1, 6, 2, 1, 0, 3, 0, 2, 2, 0, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  1,  0,  2,  0,  1,  1,  7,  0,  1,  2, 10,  4,  1,  7,  2,  8,
         1,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 11,  0,  1,  5, 10,  0,  1,  1,  3,  1, 12,  9,  3,  2,  3,  0,  8,
         2,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([12,  1, 10,  0,  6,  3,  1,  2,  8,  2,  5,  1,  2,  0,  3,  4,  0,  4,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 2, 9, 0, 9, 4, 9, 3, 8, 1, 0, 2, 6, 5, 2, 8, 4, 2, 3, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  2,  0,  2,  0,  2,  4,  0,  2,  1, 11,  4,  0,  2,  0,  8,  0,
         2,  8])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:618,lr:0.001,loss:1.9599,mean_loss:2.28,mean_f1:0.074:   6%|▌        | 618/10000 [00:15<03:49, 40.80it/s]

tensor([0, 9, 8, 1, 7, 6, 3, 3, 8, 1, 1, 7, 1, 0, 0, 2, 0, 6, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  2,  1,  1,  3, 10,  0,  0,  2,  9,  2,  1,  4,  0, 11,  0,  1,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  1,  3,  4,  1,  1,  0,  1,  7,  4,  0,  5,  0, 10,  1,  0,  2,  1,
         4,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  0,  0,  2,  0,  2,  1,  1,  9,  2,  7,  2,  2,  2,  2,  1, 13,  1,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  3,  3,  0, 12,  0,  2,  7,  2,  3,  3,  3,  0,  1,  2,  1,  5,
         2,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  4,  0,  3,  1,  1,  2,  1,  1, 10,  3,  0, 10,  0,  2,  6,  1,  2,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 1, 1, 0, 2, 0, 2, 0, 4, 3, 0, 0, 1, 3, 4, 4, 2, 1, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 2, 2, 2, 1, 0, 0, 3, 3, 0, 

epoch:0,batch:625,lr:0.001,loss:2.2303,mean_loss:2.279,mean_f1:0.074:   6%|▍       | 623/10000 [00:15<03:54, 39.99it/s]

tensor([8, 0, 3, 1, 1, 3, 9, 2, 2, 2, 6, 1, 6, 0, 3, 3, 2, 0, 1, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 0, 5, 3, 2, 1, 0, 8, 3, 9, 4, 5, 1, 0, 1, 0, 4, 0, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  7,  1,  0,  4,  4,  3,  1,  2,  0,  1,  9,  8, 11, 12, 10,  4,  1,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2,  1,  5,  0,  2,  1,  4,  1,  2,  6,  6, 10,  2,  6,  2,  4,
         0,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 2, 0, 0, 3, 2, 6, 9, 0, 3, 0, 0, 6, 1, 0, 0, 7, 1, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  0,  1, 10,  4,  1,  1,  5,  0,  0,  2,  6,  0, 11,  2,  8,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 5, 7, 3, 0, 2, 4, 1, 6, 3, 3, 1, 5, 0, 1, 4, 1, 3, 7, 1])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:633,lr:0.001,loss:2.4019,mean_loss:2.279,mean_f1:0.075:   6%|▌       | 633/10000 [00:16<03:49, 40.90it/s]

tensor([12,  2,  2, 10,  0,  6,  9,  0,  3,  3,  2,  6,  1,  0,  3,  1,  1,  3,
         8,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  0,  2,  5,  0, 11,  3,  3,  4,  1,  1,  0,  1,  0,  8,  8,  3,  1,
        10,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  0,  2,  1,  6,  1,  2,  2,  7, 12,  0,  3,  0,  0,  1,  3,  3,
         4,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  9,  0,  3,  8,  3, 13,  5,  1,  2,  2,  2,  5,  5, 10,  0,  0,  1,
         4,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  3,  2,  3,  4,  0,  5,  0,  0,  5,  3,  0,  1,  1,  6,  6,  0, 10,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  3,  1,  0,  2,  2,  0,  7,  1,  6,  0,  0,  0,  0, 10,  0,  2,  1,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  4,  1,  6,  2, 10,  6,  9,  4,  1,  2, 10,  3,  0,  0,  1,  4,  1,
         0,  4])
batch_text: torch.Size([20, 1000]

epoch:0,batch:641,lr:0.001,loss:2.3062,mean_loss:2.279,mean_f1:0.075:   6%|▌       | 638/10000 [00:16<03:51, 40.46it/s]

tensor([ 1, 10,  7,  2,  3,  8,  2,  3,  0,  7,  3, 11,  0,  2,  0,  8,  9,  4,
         1, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 4, 0, 2, 1, 5, 0, 8, 0, 1, 1, 0, 3, 2, 2, 0, 3, 2, 3, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 3, 8, 6, 6, 3, 2, 1, 9, 3, 0, 6, 0, 2, 1, 3, 2, 2, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  6,  7,  3,  3,  2,  3,  0,  7,  1,  2,  2,  4,  1,  3, 10, 10,  3,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 9, 1, 0, 2, 2, 3, 5, 6, 0, 1, 2, 8, 6, 7, 0, 3, 6, 1, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 1, 5, 2, 3, 2, 1, 4, 4, 9, 3, 2, 3, 4, 0, 8, 4, 2, 8, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 9, 2, 6, 0, 2, 3, 3, 4, 3, 2, 5, 2, 2, 1, 8, 4, 4, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  8, 10,  2,  4,  4,  7,  0,  4,  3,  1,  3,  3,  9,  1,  2,  2,  8,
         2,  7])


epoch:0,batch:648,lr:0.001,loss:2.6088,mean_loss:2.279,mean_f1:0.075:   6%|▌       | 648/10000 [00:16<03:51, 40.32it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 0, 5, 1, 4, 2, 1, 1, 1, 3, 6, 4, 2, 7, 4, 6, 2, 1, 0, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 6, 0, 9, 1, 2, 4, 5, 0, 8, 9, 3, 0, 4, 0, 7, 7, 0, 0, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  2,  2,  0,  9,  9,  0,  1,  8, 12,  1,  5,  5,  3, 10,  0,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3, 10,  1,  0,  4,  7,  0,  3,  3,  7,  1,  8,  2,  1,  0,  2,  2,  1,
         0,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5, 12,  1,  9,  1,  1,  2,  2,  0,  0,  1,  8,  4,  3,  1,  2,  2,  4,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([10,  1,  0,  0,  3,  1,  1,  2,  0,  2,  2,  1,  5,  4,  4,  0,  0,  1,
         1,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  7,  2,  6,  2,  8, 10, 10,  5,  3,  8,  6,  2,  3,  7,  2,  4,  9,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:656,lr:0.001,loss:1.9038,mean_loss:2.278,mean_f1:0.075:   7%|▌       | 653/10000 [00:16<03:53, 40.11it/s]

tensor([ 2,  7,  5,  4,  1,  0, 11,  9,  4,  2,  2,  1,  8,  4,  0,  2,  0,  4,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  2,  0,  0,  6,  1, 10, 10,  4,  1,  5,  4,  1,  3,  5,  1, 10,  7,
         2,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  2,  5,  9,  2,  6,  0,  1,  0,  4,  3,  3,  1,  1, 11,  7,  1,  2,
         0,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  1, 11,  1,  9,  0,  1,  1,  0,  0,  7,  2,  0,  2,  4,  1,  1,
         2,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2, 10,  5,  1,  2,  1,  0,  1,  3,  0,  1,  5,  6,  3,  1,  0,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  6,  8, 11,  1,  2,  2,  4,  0,  6,  2,  0,  0,  4,  0,  0,  8,
         5,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 2, 7, 1, 4, 1, 0, 0, 3, 5, 4, 2, 9, 7, 9, 2, 0, 3, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 4,

epoch:0,batch:664,lr:0.001,loss:1.9218,mean_loss:2.277,mean_f1:0.076:   7%|▌       | 663/10000 [00:16<03:48, 40.82it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 6, 0, 0, 2, 0, 2, 0, 6, 7, 8, 2, 0, 2, 1, 2, 1, 0, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  0,  2,  1,  1, 10,  0,  5,  0,  4,  2,  3,  5,  3,  0,  2,  5,  6,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  3,  1,  3, 11,  1,  5,  0,  2,  1,  7,  8,  1,  4,  6,  2,  4,  2,
         2,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  0, 13,  3,  0,  1,  1,  2,  0,  4,  0,  5,  1,  2,  6,  0,  0,  2,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  4,  8,  4,  9,  5,  7,  4,  1,  9,  2,  0,  3,  3,  0,  8,  1,
        11,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  3,  9,  1,  1, 11,  2,  1,  0,  7,  6,  3,  0,  0,  8,  1,  1,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 7, 2, 1, 3, 0, 1, 0, 7, 4, 3, 2, 6, 1, 7, 1, 2, 1, 3, 3])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:672,lr:0.001,loss:2.4097,mean_loss:2.276,mean_f1:0.075:   7%|▌       | 673/10000 [00:16<03:47, 41.05it/s]

tensor([ 1,  1,  0,  0,  2,  1,  3,  0,  3,  0,  2,  1,  4,  3,  2, 11,  2,  2,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  4,  2,  1,  0,  0,  9,  2,  1,  2,  1,  3,  7,  8,  0, 11,  3,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  0,  5,  0,  0,  2,  2,  0,  0,  5,  6,  2,  9,  2,  7,  1,  4,  5,
         3,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 2, 3, 0, 0, 1, 9, 0, 5, 4, 2, 5, 9, 5, 1, 9, 0, 2, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 8, 2, 3, 1, 0, 1, 3, 3, 3, 0, 0, 7, 0, 0, 3, 1, 2, 9, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 11,  3,  1,  1,  6, 10,  4,  0,  4,  3,  1,  5,  3,  1,  3,  3,  5,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 9, 0, 4, 3, 2, 3, 1, 2, 0, 2, 3, 3, 3, 4, 1, 4, 2, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  5,  0,  0,  8,  8,  4,  2,  3,  2,  3,  1,  0, 10, 10,  

epoch:0,batch:680,lr:0.001,loss:2.1682,mean_loss:2.275,mean_f1:0.076:   7%|▌       | 678/10000 [00:17<03:48, 40.81it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  4, 10,  3,  0,  2,  5,  6,  2,  0,  0,  6,  1,  4, 10, 12,  2,  0,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  9,  1,  2, 11,  0,  0,  0,  9, 11,  2,  9,  2,  0, 11,  1,  2,  0,
         1,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 7, 5, 0, 4, 1, 4, 0, 3, 2, 0, 4, 2, 8, 8, 3, 0, 1, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 4, 6, 9, 2, 3, 2, 5, 7, 7, 9, 2, 5, 3, 1, 8, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  4,  1,  1, 10,  2,  0,  1,  0,  2,  3,  5,  1,  1,  1,  2,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 5, 1, 0, 0, 0, 7, 5, 7, 3, 8, 1, 0, 0, 2, 4, 0, 9, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  8,  0,  0,  7,  2, 11,  0, 11,  1,  0,  1,  5,  8,  8,  1,  2,  6,
         1,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  1,  

epoch:0,batch:688,lr:0.001,loss:2.1281,mean_loss:2.274,mean_f1:0.076:   7%|▌       | 688/10000 [00:17<03:50, 40.38it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 3, 4, 1, 1, 0, 0, 0, 2, 2, 2, 0, 1, 4, 1, 1, 5, 2, 0, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  6,  4,  7,  2,  3,  0,  8,  1, 11,  2,  4,  8,  3,  2,  2,  0,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 4, 8, 5, 3, 3, 5, 2, 0, 4, 6, 1, 5, 6, 2, 4, 0, 4, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 3, 0, 3, 5, 2, 0, 2, 7, 2, 3, 3, 6, 2, 0, 3, 9, 1, 1, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  5,  5,  7,  1,  0,  3,  0, 11,  4,  7,  0,  4,  2, 10,  2,  9,
         1,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([4, 8, 2, 1, 2, 4, 7, 1, 0, 1, 2, 2, 9, 2, 1, 2, 1, 2, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1,  5,  2,  1,  5, 11,  1,  0,  0,  0,  1,  2, 11,  6,  3,  2,  4,
        10,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  2,  7,  0, 10,  7,  2,  0,  0,  

epoch:0,batch:696,lr:0.001,loss:2.5226,mean_loss:2.273,mean_f1:0.076:   7%|▌       | 693/10000 [00:17<03:51, 40.17it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 6, 7, 8, 1, 0, 1, 5, 2, 7, 1, 3, 9, 0, 1, 2, 2, 3, 7, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 2, 3, 4, 9, 1, 0, 8, 2, 4, 4, 3, 0, 0, 3, 5, 0, 0, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 7, 1, 0, 0, 6, 2, 8, 3, 1, 0, 1, 6, 1, 0, 0, 0, 4, 1, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 11,  0,  6,  3,  7,  3,  0,  2,  2,  2,  5,  7,  3,  1,  4,  3,  1,
         2,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 2, 9, 1, 7, 1, 3, 6, 2, 4, 3, 2, 1, 0, 2, 5, 1, 0, 1, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  7,  7,  2, 11,  0,  2,  1,  4,  5,  2,  2,  0,  1,  0,  6,  2,  2,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  7,  4,  2,  3,  7, 11,  2,  0,  3,  3,  1,  1,  9,  1,  8,  1,  0,
         8,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3, 10,  6, 10,  5, 10,  7,  2,  8,  

epoch:0,batch:704,lr:0.001,loss:2.2494,mean_loss:2.273,mean_f1:0.076:   7%|▌       | 702/10000 [00:17<03:54, 39.73it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  6,  1,  1,  0,  2,  4,  1,  0,  4,  9,  7,  3,  1,  3,  3,  2, 11,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  3,  8,  9,  1,  0,  1,  2, 11,  1,  2,  4,  3, 10,  5,  4,  2,
         4,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 1, 5, 2, 5, 4, 8, 7, 5, 8, 1, 1, 9, 0, 3, 7, 2, 4, 0, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  8,  0,  3,  0,  1,  7,  3,  0,  0,  2,  0,  1,  4,  2,  9, 10,  2,
         5,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  2, 12,  8,  5,  3,  1,  7,  3,  7,  7,  1,  5,  3,  0,  6,  1,  0,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 3, 5, 1, 1, 9, 0, 6, 1, 4, 8, 4, 7, 2, 0, 0, 4, 1, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  0, 10,  0,  4,  8,  3,  4,  3,  1,  1,  4,  2,  0,  0,  2,  2,  2,
        11,  1])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:712,lr:0.001,loss:2.1187,mean_loss:2.273,mean_f1:0.077:   7%|▌       | 712/10000 [00:17<03:52, 40.03it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 8, 3, 3, 8, 1, 0, 2, 9, 0, 1, 5, 1, 1, 7, 1, 0, 5, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 7, 3, 1, 0, 5, 2, 2, 2, 3, 8, 3, 0, 0, 0, 5, 0, 7, 2, 1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  5,  1,  1,  1,  9,  1,  4,  8,  2,  4,  5,  1,  3,  0,  1,  8, 11,
        10, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 7,  0,  3,  2,  1,  0,  2,  3,  5,  8,  6,  0,  2,  3,  0, 10,  0,  2,
         0, 12])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  3,  6,  2, 11,  0,  0,  5,  9,  5,  3,  7,  2,  5,  6,  9,  4,  4,
         0,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 9, 7, 2, 3, 0, 2, 0, 1, 0, 0, 4, 1, 7, 1, 1, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  2,  5, 12,  4,  4,  2,  3,  2,  4,  2,  7,  3,  3,  4,  0,  5,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 5, 2, 3, 2

epoch:0,batch:720,lr:0.001,loss:2.3326,mean_loss:2.273,mean_f1:0.077:   7%|▌       | 717/10000 [00:18<03:53, 39.76it/s]

tensor([ 5,  0,  3,  0,  2,  0, 10,  7,  1,  3,  1,  1,  3,  6,  9,  1,  0,  6,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  2,  1,  6,  2,  0,  0,  9,  3,  9, 12,  8, 11,  2,  4,  9,  1,  8,
         1,  4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 3, 6, 0, 3, 1, 2, 2, 2, 9, 3, 6, 2, 8, 3, 5, 2, 1, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  9,  0,  9,  3,  0,  1,  6,  0,  2,  3,  8,  7, 12,  4,  0,  0,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 2, 6, 2, 1, 8, 2, 2, 6, 0, 0, 5, 3, 5, 3, 8, 4, 1, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  1,  2, 12,  6,  1, 10,  5,  5,  0,  1,  0,  0,  2,  5,  1,  1,  4,
         5, 13])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 3, 5, 0, 6, 2, 6, 3, 1, 4, 3, 2, 4, 7, 3, 1, 3, 0, 0, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  3,  6,  0,  6,  3,  3,  5,  0, 10,  1,  1,  3,  9,  3,  

epoch:0,batch:728,lr:0.001,loss:1.9942,mean_loss:2.272,mean_f1:0.077:   7%|▌       | 727/10000 [00:18<03:52, 39.94it/s]

batch_label: tensor([ 6, 10,  1,  9,  2,  9, 10,  3,  6,  1,  2,  1,  0,  3,  2,  9,  3,  3,
         0,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  5,  6,  0,  4,  2,  0,  4,  2,  1,  1,  0,  0,  0,  2,  1,  1,  0,
        10,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  0,  6, 11,  0,  2,  1,  2,  1,  3,  4,  1,  3,  9,  0,  1, 11,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  9, 10,  4,  0,  0,  1,  1,  1,  1, 10,  2,  2,  6,  6,  6,  0,  2,
         0,  9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  3,  9,  5,  2,  1,  9,  4,  0, 13,  1,  1,  3, 10,  2,  0,  0,  0,
         2,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 1, 2, 5, 2, 3, 2, 1, 0, 3, 3, 2, 0, 3, 1, 9, 3, 8, 2, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  1,  0,  0, 11,  1,  0,  5, 10,  3,  4,  2,  2,  5,  7,  8,  1,  2,
         3,  5])
batch_text: torch.Size([20, 1000])
batch_label: 

epoch:0,batch:736,lr:0.001,loss:2.1468,mean_loss:2.271,mean_f1:0.077:   7%|▌       | 732/10000 [00:18<03:51, 40.03it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 8, 0, 1, 7, 0, 7, 5, 3, 2, 4, 3, 2, 2, 0, 3, 3, 2, 3, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([1, 0, 8, 3, 1, 1, 0, 1, 0, 2, 1, 0, 0, 7, 0, 6, 3, 0, 0, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  8,  6,  6,  2,  1,  3,  9, 12,  8,  3, 13,  7,  2,  2,  2,  2, 11,
         5,  8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  3,  1,  0,  2,  1,  2,  0,  0, 12,  3,  7,  1,  0,  4,  1,  2,  3,
         5,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 6, 2, 6, 0, 2, 2, 2, 4, 1, 6, 0, 6, 2, 5, 1, 0, 4, 3, 6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([3, 0, 0, 4, 8, 6, 0, 0, 0, 4, 5, 1, 2, 3, 3, 0, 1, 8, 1, 5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  2,  0,  7,  0,  0,  0,  7,  1, 10,  2,  3,  7,  2,  2,  7,  1,
         8,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 0, 4, 3, 0, 4, 0, 2, 8, 0, 5, 4, 2, 3,

epoch:0,batch:744,lr:0.001,loss:2.1068,mean_loss:2.268,mean_f1:0.078:   7%|▌       | 742/10000 [00:18<03:54, 39.44it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 0, 8, 0, 2, 1, 0, 4, 6, 5, 3, 2, 2, 1, 5, 0, 2, 4, 1, 7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 3, 0, 6, 6, 2, 4, 2, 2, 2, 6, 6, 3, 1, 5, 1, 9, 1, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([2, 1, 1, 1, 5, 0, 1, 2, 1, 5, 5, 0, 2, 7, 3, 0, 0, 0, 0, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([9, 0, 2, 7, 2, 0, 4, 1, 2, 0, 1, 8, 2, 3, 0, 9, 3, 2, 3, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 2, 3, 2, 3, 0, 7, 1, 8, 1, 0, 7, 0, 5, 1, 1, 2, 3, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 6,  2,  1, 10,  0,  6,  7,  3,  8,  0,  5,  0,  3,  2,  1,  2,  1,  2,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 3, 1, 0, 9, 2, 2, 0, 3, 2, 2, 1, 2, 3, 0, 1, 0, 0, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 8,  0,  0,  9,  0,  4,  0,  0, 10,  1,  1,  6,  3,  1,  1,  2,  9,  1,
         2,  0])


epoch:0,batch:752,lr:0.001,loss:2.1593,mean_loss:2.267,mean_f1:0.078:   8%|▌       | 751/10000 [00:18<03:54, 39.44it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  5,  1,  4,  1,  0,  1,  0,  7,  2,  0,  0,  5, 11,  2,  8,  3,  2,
         2,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([0, 2, 8, 0, 0, 2, 4, 2, 2, 0, 2, 2, 5, 0, 0, 3, 3, 5, 0, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  2,  0,  3,  0,  6,  3,  6,  0,  4,  2,  0,  3,  0, 10,  2,  5,
         5,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  5,  0,  1,  7,  2,  2,  7,  1,  6,  6,  2,  4,  6, 12,  1,  0, 11,
         2,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([6, 4, 4, 7, 5, 5, 8, 2, 1, 9, 2, 0, 1, 7, 2, 5, 3, 3, 2, 4])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  8,  2,  0,  6, 10,  7,  1,  3,  0,  0,  9,  8,  7,  0,  6,  1,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  5,  0,  3,  2,  7,  0,  4,  0,  3,  5,  2, 12,  7,  2,  5,  0,  0,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_

epoch:0,batch:760,lr:0.001,loss:2.0621,mean_loss:2.268,mean_f1:0.078:   8%|▌       | 756/10000 [00:19<03:51, 39.88it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1, 12,  3,  8,  1,  6,  2,  1,  0,  2,  1,  1,  7,  8,  1,  4,  7,
        11,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  1,  3,  0,  9,  3,  6,  2,  3,  1,  8,  5,  5, 11,  8,  0,  5,
         3,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  0,  1,  8,  0,  0,  2,  0, 12,  4, 10,  1,  5,  8,  5,  7,  8,  8,
         2,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  1,  0,  1,  2,  4,  0,  2,  0,  0,  1,  0,  0,  4,  2,  0, 10,
         1,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 10,  4,  9,  7,  4,  4,  1,  2,  1,  7,  3,  4,  6,  2,  1,  3,  1,
         3,  6])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  9,  6,  5,  0,  9,  0,  5,  9,  0,  3,  0,  2, 11,  3,  3,  1,  1,
         6,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  1,  1,  3, 13,  2,  2,  8,  5,  0,  5,  9,  0,  2,  8,  9,  3,  7,
  

epoch:0,batch:768,lr:0.001,loss:2.2133,mean_loss:2.268,mean_f1:0.078:   8%|▌       | 766/10000 [00:19<03:52, 39.78it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1, 10,  1,  1,  4,  2,  8,  1,  1,  2,  1,  8,  9,  2,  1,  0,  7, 13,
         6, 11])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  1,  5,  2,  0,  3,  8,  0, 11,  3,  1, 10,  3,  0,  5,  4,  1,  5,
         1,  7])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5,  1,  3, 13,  1,  2,  8,  1,  5,  6,  5,  5,  2,  6,  2,  1, 12,  3,
         3,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 9,  6,  9,  6,  1, 12,  6,  2,  0,  2,  0,  5, 11,  5,  1,  8,  0,  2,
         7,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 4,  8,  5,  2,  5,  6,  1,  1,  4,  0,  0,  0,  2, 10,  5,  4,  5,  1,
         0,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 3,  2,  8,  1,  1,  2,  2,  1,  1,  3, 10,  6,  6,  1,  4,  9,  9,  1,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 1,  6,  1,  4,  8, 12,  3,  1,  3,  1,  5,  0,  2,  0,  0,  3,  5,  1,
  

epoch:0,batch:776,lr:0.001,loss:2.1546,mean_loss:2.267,mean_f1:0.079:   8%|▌       | 776/10000 [00:19<03:48, 40.34it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 3, 1, 7, 3, 1, 4, 5, 3, 5, 1, 3, 0, 8, 4, 7, 1, 0, 7, 2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  6,  1,  6,  2,  2,  0,  0,  2,  0,  1,  2,  1,  5,  8,  0,  0,
        12, 10])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 5, 10,  0,  5,  0,  7,  0,  5, 10,  7,  3,  1, 13,  2,  8,  2,  3,  3,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 2, 1, 1, 2, 4, 4, 0, 0, 1, 6, 7, 7, 1, 1, 2, 4, 6, 3, 8])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 2,  3,  0,  0,  6, 11,  9,  3,  2,  1,  0,  4,  0,  1,  4,  3,  0,  5,
         1,  0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 9, 6, 0, 2, 0, 0, 0, 5, 8, 7, 1, 1, 2, 1, 7, 0, 2, 2, 0])
batch_text: torch.Size([20, 1000])
batch_label: tensor([7, 0, 0, 9, 2, 4, 1, 2, 1, 1, 5, 5, 1, 1, 0, 5, 2, 9, 2, 9])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  2,  0,  7,  1,  2, 13,  3,  2,  0,  

epoch:0,batch:781,lr:0.001,loss:2.4512,mean_loss:2.268,mean_f1:0.079:   8%|▋       | 782/10000 [00:19<03:52, 39.59it/s]

batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0, 10,  2,  1,  2,  0,  7,  0,  8,  9,  3,  3,  7, 13,  1,  1,  8,  2,
         2,  1])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  1, 11,  9,  0,  4,  4,  3,  6,  0, 10,  1,  4, 10,  3,  1,  3, 10,
         5,  2])
batch_text: torch.Size([20, 1000])
batch_label: tensor([ 0,  0,  8,  1, 11,  2,  1,  1,  0,  0, 11,  2,  0,  0,  1,  0,  1,  3,
         3,  3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([11,  5,  4,  3,  2,  3,  6,  2,  1,  2,  0,  3,  3,  0,  2,  1,  1,  1,
         0,  5])
batch_text: torch.Size([20, 1000])
batch_label: tensor([8, 8, 8, 4, 4, 0, 4, 4, 3, 4, 3, 4, 5, 8, 7, 1, 1, 1, 4, 3])
batch_text: torch.Size([20, 1000])
batch_label: tensor([5, 4, 3, 1, 0, 1, 6, 6, 2, 1, 1, 3, 3, 2, 5, 2, 0, 0, 2, 0])





RuntimeError: unique_by_key: failed on 2nd step: cudaErrorAssert: device-side assert triggered

　　上面报了个错，查了之后发现网上说很有可能是使用ｃｒｏｓｓＥｎｔｒｏｐｙＬｏｓｓ的时候，输入数据的类别数目跟实际的类别数目不对照。首先检查了一下ｌａｂｅｌ的最大最小值以及每个数据的ｌａｂｅｌ的长度，没有发现问题。后来有一个说法是说可能跟稀疏矩阵有关系，突然意识到，之前ｔｏｋｅｎ的最大编号是７５４９，但是我把每个ｔｏｋｅｎ增加了１，然后把０空出来作为新增的空格，但是在ｅｍｂｅｄｄｉｎｇ层中，我设置的形状是７５５０，７５５０指的应该是ｔｏｋｅｎ的最大范围，在我增加０之后，这个范围从０－７５４９变成了０－７５５０，现在ｅｍｂｅｄｄｉｎｇ层的形状应该是７５５１才能正常使用，把这个改过来之后再尝试。

In [5]:
import torch
import torch.nn as nn
from torch.utils.data.dataset import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm
import os

class MyDataset(Dataset):
    def __init__(self,csv_path):
        csv_data = pd.read_csv(csv_path,sep='\t')    
        self.text_data = csv_data.text
        self.label_data = csv_data.label
        
    def __getitem__(self,index):
        #所有text内的token索引增加1，0空出来代表空格，将每个text控制在1000长
        text_str = self.text_data[index]
        text_list = [int(x)+1 for x in text_str.split()]
        if len(text_list)>=1000:
            text_list = text_list[:1000]
        else:
            text_list.extend([0]*(1000-len(text_list)))
        text_array = np.array(text_list)
        label_array = np.array(self.label_data[index])
        return text_array,label_array
        
    def __len__(self):
        return len(self.text_data)

class MyModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(7551,100)
        self.rnn = nn.GRU(100,50,batch_first=True)
        self.fc = nn.Linear(50,14)
        
    def forward(self,X):
#         print('X:',X.shape)
        X = self.embedding(X)
#         print('embedding:',X.shape)
        _,X = self.rnn(X)
#         print('rnn:',X.shape)
        X = X.squeeze(dim=0)
#         print('squeeze:',X.shape)
        y_hat = self.fc(X)
#         print('y_hat:',y_hat.shape)
        return y_hat

    
class MyTrain():
    def __init__(self,max_epoch=1,random_seed=1,lr=0.001,out_dir='./'):
        self.max_epoch = max_epoch
        self.random_seed = random_seed
        self.lr = lr
        self.out_dir = out_dir
        self.iter = 0
        
    def fix_random(self):
        import random
        import numpy as np
        import torch
        random.seed(self.random_seed)
        np.random.seed(self.random_seed)
        torch.random.manual_seed(self.random_seed)
        torch.cuda.random.manual_seed_all(self.random_seed)
        torch.backends.cudnn.deterministic = True
        print(f'random seed:{self.random_seed}')
        
    def my_train(self):
        
        max_epoch,lr = self.max_epoch,self.lr
        if self.random_seed is not None:
            self.fix_random()
        my_dataset = MyDataset('./train_set.csv')
        my_model = MyModel()
        my_model.train()          #将模型设置为训练模式
        my_optim = torch.optim.Adam(my_model.parameters(),lr=lr)
        my_loss = nn.CrossEntropyLoss()
        if torch.cuda.is_available():
            my_model.cuda()
            my_loss.cuda()
        print(f'train device:{next(iter(my_model.parameters())).device}')  #显示训练设备
        
        for epoch_index in range(max_epoch):
            loss_list = []
            f1_score_list = []
            best_f1_score = 0
            my_dataloader = DataLoader(my_dataset,batch_size=100,shuffle=True)
            my_dataloader = tqdm(my_dataloader)
            for batch_index,batch_data in enumerate(my_dataloader):
                batch_text,batch_label = batch_data
#                 print('batch_text:',batch_text.shape)
#                 print('batch_label:',batch_label)
                if torch.cuda.is_available():
                    batch_text = batch_text.cuda()
                    batch_label = batch_label.cuda()
                    
                y_hat = my_model(batch_text)
                batch_loss = my_loss(y_hat,batch_label)
                
                my_optim.zero_grad()
                batch_loss.backward()
#                 for i in my_model.parameters():
#                     print('grad:',i.grad)
                my_optim.step()
                my_optim.param_groups[0]['lr'] = lr*(0.8**(epoch_index%10))
                
                #显示batch结果
                batch_lr = round(my_optim.param_groups[0]['lr'],5)
                batch_loss = round(batch_loss.item(),4)
                loss_list.append(batch_loss)
                mean_loss = round((sum(loss_list)/len(loss_list)),3)

                batch_f1_score = self.f1_score(y_hat.data,batch_label.data)
                f1_score_list.append(batch_f1_score)
                mean_f1 = round(sum(f1_score_list)/len(f1_score_list),3)
                my_dataloader.set_description(f'epoch:{epoch_index},batch:{batch_index},lr:{batch_lr},loss:{batch_loss},mean_loss:{mean_loss},mean_f1:{mean_f1}')
                
                #存储模型
                torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_last'))                
                if batch_f1_score>best_f1_score:
                    torch.save(my_model.state_dict(),os.path.join(self.out_dir,'embedding_gru_best'))
                    best_f1_score = batch_f1_score
                    
                self.iter += 1
                
    def f1_score(self,y_hat,label,eps=1e-8):
        #y_hat(N,C),label(1)
        y_hat = y_hat.cpu()
        label = label.cpu()
        preds_list = list(torch.argmax(y_hat,dim=1).numpy())
        label_list = list(label.numpy())
#         print(f'preds:{preds_list},label:{label_list}')
        class_index_list = []
        for class_index in label_list:
            if class_index not in class_index_list:
                class_index_list.append(class_index)

        f1_score_list = []
        for index in class_index_list:
            if index not in preds_list:
                sub_f1_score = 0
            else:
                tp = 0
                fp = 0
                fn = 0
                for i in range(len(preds_list)):
                    if preds_list[i] == index and label_list[i] == index:
                        tp+=1
                    if preds_list[i] == index and label_list[i] != index: 
                        fp+=1
                    if preds_list[i] != index and label_list[i] == index:
                        fn+=1
                prec_val = tp/(tp+fp) 
                recall_val = tp/(tp+fn)
                sub_f1_score = 2*(prec_val*recall_val)/(prec_val+recall_val+eps)
            f1_score_list.append(sub_f1_score)

        batch_f1_score = sum(f1_score_list)/len(f1_score_list)

        return batch_f1_score
        
        
        
#     #验证所有的验证集数据
#     def my_valid(self,valid_dataloader,model,criterion):

#         #将模型设置为计算模型
#         model.eval()

#         total_loss = 0
#         with torch.no_grad():
#             for batch_index,valid_data in enumerate(valid_dataloader):
#                 batch_feat,batch_label = valid_data
#                 y_hat = model(batch_feat)
#                 loss = criterion(y_hat,batch_label)
#                 total_loss+=loss
#             total_loss/=(batch_index+1)

#         return total_loss

In [8]:
if __name__ == '__main__':
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
    MyTrain(max_epoch=100,random_seed=1).my_train()

random seed:1
train device:cuda:0


epoch:0,batch:1999,lr:0.001,loss:0.3871,mean_loss:1.061,mean_f1:0.463: 100%|███████| 2000/2000 [02:01<00:00, 16.47it/s]
epoch:1,batch:1999,lr:0.0008,loss:0.2512,mean_loss:0.306,mean_f1:0.851: 100%|██████| 2000/2000 [02:01<00:00, 16.49it/s]
epoch:2,batch:1999,lr:0.00064,loss:0.1788,mean_loss:0.229,mean_f1:0.899: 100%|█████| 2000/2000 [02:00<00:00, 16.54it/s]
epoch:3,batch:1999,lr:0.00051,loss:0.0831,mean_loss:0.195,mean_f1:0.916: 100%|█████| 2000/2000 [02:01<00:00, 16.49it/s]
epoch:4,batch:1999,lr:0.00041,loss:0.1926,mean_loss:0.172,mean_f1:0.927: 100%|█████| 2000/2000 [02:01<00:00, 16.48it/s]
epoch:5,batch:1999,lr:0.00033,loss:0.1434,mean_loss:0.157,mean_f1:0.935: 100%|█████| 2000/2000 [02:01<00:00, 16.48it/s]
epoch:6,batch:1999,lr:0.00026,loss:0.1661,mean_loss:0.145,mean_f1:0.94: 100%|██████| 2000/2000 [02:01<00:00, 16.49it/s]
epoch:7,batch:1999,lr:0.00021,loss:0.1038,mean_loss:0.136,mean_f1:0.944: 100%|█████| 2000/2000 [02:01<00:00, 16.46it/s]
epoch:8,batch:1999,lr:0.00017,loss:0.147