In [1]:
from distutils.command.config import config
import numpy as np
import pandas as pd
import torch
from torch import nn
from torch.optim import AdamW
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertModel, BertConfig, get_linear_schedule_with_warmup
from tqdm import tqdm
import os
import time
from transformers import BertTokenizer
from transformers import logging
import processing
from sklearn import metrics
import warnings
import time
import sys
sys.path.append("D:/Experiment")
import MyModel
from MyKu import processing

logging.set_verbosity_error()

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(DEVICE)

cuda:0
cuda:0


In [2]:
train_data = processing.get_OLID_train_data()
test_data = processing.get_OLID_testA_data()
# train_data, test_data = processing.load_hasoc2020()
# train_data = processing.get_SEM_data(processing.SEM2018_DATASET+'/train.tsv')
# test_data = processing.get_SEM_data(processing.SEM2018_DATASET+'/test.tsv')

In [3]:
path = "D:/Experiment_models_save/OLID/words_detect_model.pth"

wordModel = MyModel.MyBertModel(60, 2)
wordModel.to(DEVICE)
wordModel.load_state_dict(torch.load(path))
wordModel.eval()

MyBertModel(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)


In [4]:
MAX_LEN = 60

def save_pretrained(model, path):
    # 保存模型，先利用os模块创建文件夹，后利用torch.save()写入模型文件
    os.makedirs(path, exist_ok=True)
    torch.save(model, os.path.join(path, 'model.pth'))

pretrained_model_name = 'bert-base-uncased'
tokenizer = BertTokenizer.from_pretrained(
    pretrained_model_name, do_lower_case=True)

In [5]:
def preprocessing_for_bert(data):
    input_ids, attention_masks, labels = [], [], []
    for sent, label in data:
        encoded_sent = tokenizer.encode_plus(
            text=sent,
            add_special_tokens=True,
            max_length=MAX_LEN,
            padding='max_length',
            truncation=True,
            return_attention_mask=True,
            )
        input_ids.append(encoded_sent.get('input_ids'))
        attention_masks.append(encoded_sent.get('attention_mask'))
        labels.append(label)
    input_ids = torch.Tensor(input_ids)
    attention_masks = torch.Tensor(attention_masks)
    # print(attention_masks)
    labels = torch.Tensor(labels)
    return input_ids, attention_masks, labels

train_inputs, train_masks, train_labels = preprocessing_for_bert(train_data)
test_inputs, test_masks, test_labels = preprocessing_for_bert(test_data)

train_dataset = TensorDataset(train_inputs, train_masks, train_labels)
test_dataset = TensorDataset(test_inputs, test_masks, test_labels)
train_sampler = RandomSampler(train_dataset)
test_sampler = SequentialSampler(test_dataset)

train_iter = DataLoader(train_dataset, sampler=train_sampler, batch_size=32)
test_iter = DataLoader(test_dataset, sampler=test_sampler, batch_size=32)

In [6]:
class MyBertModel(nn.Module):
    def __init__(self, class_size, num_layers, dropout, pretrained_name='bert-base-uncased'):
        super(MyBertModel, self).__init__()
        config = BertConfig.from_pretrained(pretrained_name)
        config.output_attentions = True
        config.return_dict = True
        self.bert = BertModel.from_pretrained(pretrained_name, config=config)
        self.lstm = nn.LSTM(768, 100, num_layers=num_layers,
                            bidirectional=True, dropout=dropout, batch_first=True)
        self.weight_W = nn.Parameter(torch.Tensor(768, 768))
        self.weight_proj = nn.Parameter(torch.Tensor(768, 768))
        self.decoder1 = nn.Linear(100 * 4, MAX_LEN)
        self.decoder2 = nn.Linear(2 * MAX_LEN, class_size)
        self.decoder3 = nn.Linear(MAX_LEN, class_size)
        self.relu = nn.ReLU()
        nn.init.uniform_(self.weight_W, -0.1, 0.1)
        nn.init.uniform_(self.weight_proj, -0.1, 0.1)

    def forward(self, input_ids, attention_mask):
        outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
        wordsScore = wordModel(input_ids, attention_mask) # wordsScore size [32, 60]
        last_hidden_state_cls = outputs[0] # last_hidden_state_cls (32, 60, 768)
        # bert_output, idxs = torch.max(last_hidden_state_cls, dim=2) # bert_output (32, 60)
        # print(last_hidden_state_cls.shape)
        w = torch.tanh(torch.matmul(last_hidden_state_cls, self.weight_W))  # w torch.Size([32, 60, 768])
        self_matching = torch.matmul(w, self.weight_proj)   # w torch.Size([32, 60, 60])
        att_score, idxs = torch.max(self_matching, dim=2)   # att_score torch.Size([32, 60])
        self.lstm.flatten_parameters()
        output_hidden, _ = self.lstm(last_hidden_state_cls) #output_hidden (32, 60, 200)
        output = torch.cat((output_hidden[:,0,:], output_hidden[:,-1,:]), dim=1)    # output torch.Size([64, 400])
        output = self.decoder1(output)      # output torch.Size([32, 60])
        # self_matching_out torch.Size([32, 60])
        self_matching_out = att_score.mul(output)
        # outs = self.decoder2(self_matching_out)
        f = torch.cat((self_matching_out, wordsScore), dim=-1)
        # outs = torch.softmax(torch.matmul(f, self.W_f) + self.bias, dim=1)
        # f = self.relu(f)
        outs = self.decoder2(f)
        return outs

In [7]:
def initialize_model(epoch=10):
    bert_model = MyBertModel(2, 1, 0.5)
    bert_model.to(DEVICE)
    learning_rate = 2e-5
    optimizer = AdamW(
        bert_model.parameters(),
        lr=learning_rate,
        eps = 1e-8
    )
    total_steps = len(train_iter) * epoch
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)
    return bert_model, optimizer, scheduler

loss_fn = nn.CrossEntropyLoss()

In [8]:
def train(model, train_iter, test_iter, optimizer, scheduler, epochs=10, evaluation=None):
    for num_epoch in range(epochs):
        print(f"{'Epoch':^7} | {'每40个Batch':^9} | {'训练集 Loss':^12} | {'测试集 Loss':^10} | {'测试集准确率':^9} | {'时间':^9}")
        print("-" * 80)
        t0_epoch, t0_batch = time.time(), time.time()
        total_loss, batch_loss, batch_counts = 0, 0, 0
        model.train()
        for step, batch in enumerate(train_iter):
            batch_counts += 1
            b_input_ids, b_att_masks, b_labels = tuple(t.to(DEVICE, dtype=torch.int32) for t in batch)
            model.zero_grad()
            # b_input_ids = b_input_ids.to(dtype=torch.int32)
            output = model(b_input_ids, b_att_masks)
            # print(output.dtype)
            # b_input_ids = b_input_ids.to(dtype=torch.int32)
            loss = loss_fn(output, b_labels.long())
            batch_loss += loss.item()
            total_loss += loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            if(step % 40 == 0 and step != 0) or (step == len(train_iter) - 1):
                time_elapsed = time.time() - t0_batch
                print(f"{num_epoch + 1:^7} | {step:^10}  | {batch_loss / batch_counts:^14.6f}  | {'-':^12} | {'-':^13} |  {time_elapsed:^9.2f}")
                batch_loss, batch_counts = 0, 0
                t0_batch = time.time()
        
        avg_train_loss = total_loss / len(train_iter)
        print('-' * 80)

        if evaluation:
            test_loss, test_accuracy, f1_score = evaluate(model, test_iter)
            time_elapsed = time.time() - t0_epoch
            print(f"{num_epoch + 1:^7} | {'-':^10} | {avg_train_loss:^14.6f} | {test_loss:^12.6f} | {test_accuracy:^12.2f} | {f1_score:^12.2f} | {time_elapsed:^9.2f}")
            print("-" * 80)
        print("\n")

def evaluate(model, test_dataloader):
    """
    在每个epoch后验证集上评估model性能
    """
    model.eval()
    # 准确率和误差
    test_accuracy = []
    test_loss = []
    pred_y, true_y = [], []
    # 验证集上的每个batch
    for batch in test_dataloader:
        # 放到GPU上
        b_input_ids, b_attn_mask, b_labels = tuple(
            t.to(DEVICE, dtype=torch.int32) for t in batch)
        # 计算结果，不计算梯度
        with torch.no_grad():
            # 放到model里面去跑，返回验证集的ouput就是一行三列的
            output = model(b_input_ids, b_attn_mask)
        # 计算误差
        loss = loss_fn(output, b_labels.long())
        test_loss.append(loss.item())
        pred = torch.argmax(output, dim=1).flatten()
        # get预测结果，这里就是求每行最大的索引咯，然后用flatten打平成一维
        # 计算准确率，这个就是俩比较，返回相同的个数, .cpu().numpy()就是把tensor从显卡上取出来然后转化为numpy类型的举证好用方法
        # 最后mean因为直接bool形了，也就是如果预测和label一样那就返回1，正好是正确的个数，求平均就是准确率了
        accuracy = (pred == b_labels).cpu().numpy().mean() * 100
        test_accuracy.append(accuracy)
        pred_y.extend(pred.tolist())
        true_y.extend(b_labels.tolist())
    # print(len(true_y))
    # 计算整体的平均正确率和loss
    print(metrics.confusion_matrix(true_y, pred_y))
    val_loss = np.mean(test_loss)
    val_accuracy = np.mean(test_accuracy)
    f1_socre = metrics.f1_score(true_y, pred_y, average="macro") * 100

    return val_loss, val_accuracy, f1_socre


In [9]:
for i in range(100):
      bert_model, optimizer, scheduler = initialize_model(2)
      print("Start training and testing:\n")
      train(bert_model, train_iter,
            test_iter, optimizer, scheduler, epochs=2, evaluation=True)



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.596778     |      -       |       -       |    24.57  
   1    |     80      |    0.476742     |      -       |       -       |    22.85  
   1    |    120      |    0.467265     |      -       |       -       |    22.75  
   1    |    160      |    0.455175     |      -       |       -       |    22.72  
   1    |    200      |    0.431195     |      -       |       -       |    22.37  
   1    |    240      |    0.425460     |      -       |       -       |    22.43  
   1    |    280      |    0.422872     |      -       |       -       |    22.42  
   1    |    320      |    0.421207     |      -       |       -       |    22.43  
   1    |    360      |    0.454797     |      -       |       -       |    22.43  
   1    |    400      |    0.426540     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.634468     |      -       |       -       |    22.98  
   1    |     80      |    0.520283     |      -       |       -       |    22.46  
   1    |    120      |    0.484785     |      -       |       -       |    22.49  
   1    |    160      |    0.455214     |      -       |       -       |    22.52  
   1    |    200      |    0.435274     |      -       |       -       |    22.50  
   1    |    240      |    0.460964     |      -       |       -       |    22.52  
   1    |    280      |    0.472750     |      -       |       -       |    22.52  
   1    |    320      |    0.434793     |      -       |       -       |    22.51  
   1    |    360      |    0.448576     |      -       |       -       |    22.50  
   1    |    400      |    0.442830     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.588430     |      -       |       -       |    23.00  
   1    |     80      |    0.485608     |      -       |       -       |    22.42  
   1    |    120      |    0.459566     |      -       |       -       |    22.40  
   1    |    160      |    0.433833     |      -       |       -       |    22.43  
   1    |    200      |    0.427402     |      -       |       -       |    22.43  
   1    |    240      |    0.411542     |      -       |       -       |    22.44  
   1    |    280      |    0.493283     |      -       |       -       |    22.44  
   1    |    320      |    0.439895     |      -       |       -       |    22.44  
   1    |    360      |    0.437505     |      -       |       -       |    22.43  
   1    |    400      |    0.409556     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.579197     |      -       |       -       |    23.11  
   1    |     80      |    0.475106     |      -       |       -       |    22.52  
   1    |    120      |    0.412744     |      -       |       -       |    22.52  
   1    |    160      |    0.422035     |      -       |       -       |    22.52  
   1    |    200      |    0.433056     |      -       |       -       |    22.51  
   1    |    240      |    0.439915     |      -       |       -       |    22.53  
   1    |    280      |    0.431979     |      -       |       -       |    22.52  
   1    |    320      |    0.447604     |      -       |       -       |    22.51  
   1    |    360      |    0.424275     |      -       |       -       |    22.52  
   1    |    400      |    0.441459     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.664747     |      -       |       -       |    22.90  
   1    |     80      |    0.529761     |      -       |       -       |    22.40  
   1    |    120      |    0.533830     |      -       |       -       |    22.41  
   1    |    160      |    0.501221     |      -       |       -       |    22.43  
   1    |    200      |    0.441805     |      -       |       -       |    22.42  
   1    |    240      |    0.480174     |      -       |       -       |    22.43  
   1    |    280      |    0.458393     |      -       |       -       |    22.43  
   1    |    320      |    0.456699     |      -       |       -       |    22.46  
   1    |    360      |    0.435595     |      -       |       -       |    22.51  
   1    |    400      |    0.473639     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.636712     |      -       |       -       |    23.00  
   1    |     80      |    0.533802     |      -       |       -       |    22.47  
   1    |    120      |    0.455250     |      -       |       -       |    22.54  
   1    |    160      |    0.472484     |      -       |       -       |    22.53  
   1    |    200      |    0.480608     |      -       |       -       |    22.55  
   1    |    240      |    0.464991     |      -       |       -       |    22.53  
   1    |    280      |    0.436687     |      -       |       -       |    22.54  
   1    |    320      |    0.446908     |      -       |       -       |    22.52  
   1    |    360      |    0.460821     |      -       |       -       |    22.53  
   1    |    400      |    0.434070     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.595912     |      -       |       -       |    22.89  
   1    |     80      |    0.509131     |      -       |       -       |    22.38  
   1    |    120      |    0.464177     |      -       |       -       |    22.42  
   1    |    160      |    0.441892     |      -       |       -       |    22.47  
   1    |    200      |    0.432243     |      -       |       -       |    22.55  
   1    |    240      |    0.415012     |      -       |       -       |    22.54  
   1    |    280      |    0.473063     |      -       |       -       |    22.54  
   1    |    320      |    0.428462     |      -       |       -       |    22.54  
   1    |    360      |    0.417910     |      -       |       -       |    22.55  
   1    |    400      |    0.417696     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.594821     |      -       |       -       |    22.97  
   1    |     80      |    0.505990     |      -       |       -       |    22.47  
   1    |    120      |    0.479286     |      -       |       -       |    22.51  
   1    |    160      |    0.455124     |      -       |       -       |    22.51  
   1    |    200      |    0.439277     |      -       |       -       |    22.53  
   1    |    240      |    0.420441     |      -       |       -       |    22.50  
   1    |    280      |    0.451582     |      -       |       -       |    22.52  
   1    |    320      |    0.444727     |      -       |       -       |    22.50  
   1    |    360      |    0.413129     |      -       |       -       |    22.48  
   1    |    400      |    0.423925     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.603981     |      -       |       -       |    23.05  
   1    |     80      |    0.509906     |      -       |       -       |    22.48  
   1    |    120      |    0.462494     |      -       |       -       |    22.54  
   1    |    160      |    0.463928     |      -       |       -       |    22.50  
   1    |    200      |    0.463176     |      -       |       -       |    22.51  
   1    |    240      |    0.440740     |      -       |       -       |    22.52  
   1    |    280      |    0.428431     |      -       |       -       |    22.52  
   1    |    320      |    0.414085     |      -       |       -       |    22.51  
   1    |    360      |    0.436109     |      -       |       -       |    22.52  
   1    |    400      |    0.423729     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.697650     |      -       |       -       |    23.13  
   1    |     80      |    0.534393     |      -       |       -       |    22.52  
   1    |    120      |    0.500714     |      -       |       -       |    22.51  
   1    |    160      |    0.482292     |      -       |       -       |    22.50  
   1    |    200      |    0.466870     |      -       |       -       |    22.53  
   1    |    240      |    0.452701     |      -       |       -       |    22.51  
   1    |    280      |    0.463359     |      -       |       -       |    22.50  
   1    |    320      |    0.450762     |      -       |       -       |    22.41  
   1    |    360      |    0.447874     |      -       |       -       |    22.44  
   1    |    400      |    0.416015     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.758658     |      -       |       -       |    23.01  
   1    |     80      |    0.556596     |      -       |       -       |    22.47  
   1    |    120      |    0.524766     |      -       |       -       |    22.51  
   1    |    160      |    0.529486     |      -       |       -       |    22.53  
   1    |    200      |    0.494536     |      -       |       -       |    22.55  
   1    |    240      |    0.489981     |      -       |       -       |    22.52  
   1    |    280      |    0.464526     |      -       |       -       |    22.54  
   1    |    320      |    0.483669     |      -       |       -       |    22.55  
   1    |    360      |    0.440075     |      -       |       -       |    22.55  
   1    |    400      |    0.470382     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.639286     |      -       |       -       |    23.05  
   1    |     80      |    0.527724     |      -       |       -       |    22.47  
   1    |    120      |    0.475760     |      -       |       -       |    22.49  
   1    |    160      |    0.462662     |      -       |       -       |    22.41  
   1    |    200      |    0.423240     |      -       |       -       |    22.43  
   1    |    240      |    0.459833     |      -       |       -       |    22.41  
   1    |    280      |    0.464850     |      -       |       -       |    22.42  
   1    |    320      |    0.444611     |      -       |       -       |    22.39  
   1    |    360      |    0.442556     |      -       |       -       |    22.43  
   1    |    400      |    0.479404     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.587955     |      -       |       -       |    23.08  
   1    |     80      |    0.467648     |      -       |       -       |    22.46  
   1    |    120      |    0.469117     |      -       |       -       |    22.48  
   1    |    160      |    0.466238     |      -       |       -       |    22.49  
   1    |    200      |    0.466794     |      -       |       -       |    22.50  
   1    |    240      |    0.433212     |      -       |       -       |    22.48  
   1    |    280      |    0.434997     |      -       |       -       |    22.48  
   1    |    320      |    0.406889     |      -       |       -       |    22.48  
   1    |    360      |    0.427804     |      -       |       -       |    22.48  
   1    |    400      |    0.394259     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.609928     |      -       |       -       |    22.92  
   1    |     80      |    0.509144     |      -       |       -       |    22.35  
   1    |    120      |    0.454158     |      -       |       -       |    22.45  
   1    |    160      |    0.433926     |      -       |       -       |    22.44  
   1    |    200      |    0.429785     |      -       |       -       |    22.45  
   1    |    240      |    0.428950     |      -       |       -       |    22.43  
   1    |    280      |    0.436057     |      -       |       -       |    22.45  
   1    |    320      |    0.445182     |      -       |       -       |    22.44  
   1    |    360      |    0.445690     |      -       |       -       |    22.44  
   1    |    400      |    0.435482     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.662683     |      -       |       -       |    23.04  
   1    |     80      |    0.508844     |      -       |       -       |    22.44  
   1    |    120      |    0.485039     |      -       |       -       |    22.47  
   1    |    160      |    0.467920     |      -       |       -       |    22.49  
   1    |    200      |    0.477276     |      -       |       -       |    22.50  
   1    |    240      |    0.425056     |      -       |       -       |    22.48  
   1    |    280      |    0.451617     |      -       |       -       |    22.50  
   1    |    320      |    0.442283     |      -       |       -       |    22.48  
   1    |    360      |    0.464478     |      -       |       -       |    22.49  
   1    |    400      |    0.436154     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.710480     |      -       |       -       |    22.95  
   1    |     80      |    0.523672     |      -       |       -       |    22.36  
   1    |    120      |    0.512060     |      -       |       -       |    22.41  
   1    |    160      |    0.487643     |      -       |       -       |    22.39  
   1    |    200      |    0.498710     |      -       |       -       |    22.41  
   1    |    240      |    0.495261     |      -       |       -       |    22.40  
   1    |    280      |    0.466751     |      -       |       -       |    22.40  
   1    |    320      |    0.453517     |      -       |       -       |    22.40  
   1    |    360      |    0.458510     |      -       |       -       |    22.40  
   1    |    400      |    0.430877     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.640111     |      -       |       -       |    23.12  
   1    |     80      |    0.514722     |      -       |       -       |    22.47  
   1    |    120      |    0.466296     |      -       |       -       |    22.48  
   1    |    160      |    0.456130     |      -       |       -       |    22.49  
   1    |    200      |    0.437451     |      -       |       -       |    22.48  
   1    |    240      |    0.433100     |      -       |       -       |    22.48  
   1    |    280      |    0.458247     |      -       |       -       |    22.48  
   1    |    320      |    0.453778     |      -       |       -       |    22.49  
   1    |    360      |    0.424254     |      -       |       -       |    22.47  
   1    |    400      |    0.433248     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.664448     |      -       |       -       |    22.90  
   1    |     80      |    0.529241     |      -       |       -       |    22.33  
   1    |    120      |    0.497512     |      -       |       -       |    22.41  
   1    |    160      |    0.503425     |      -       |       -       |    22.40  
   1    |    200      |    0.475058     |      -       |       -       |    22.42  
   1    |    240      |    0.457364     |      -       |       -       |    22.39  
   1    |    280      |    0.445208     |      -       |       -       |    22.41  
   1    |    320      |    0.453549     |      -       |       -       |    22.44  
   1    |    360      |    0.444662     |      -       |       -       |    22.48  
   1    |    400      |    0.449082     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.577009     |      -       |       -       |    22.98  
   1    |     80      |    0.487865     |      -       |       -       |    22.43  
   1    |    120      |    0.456727     |      -       |       -       |    22.47  
   1    |    160      |    0.417091     |      -       |       -       |    22.48  
   1    |    200      |    0.428083     |      -       |       -       |    22.45  
   1    |    240      |    0.459267     |      -       |       -       |    22.47  
   1    |    280      |    0.441857     |      -       |       -       |    22.48  
   1    |    320      |    0.437000     |      -       |       -       |    22.46  
   1    |    360      |    0.431745     |      -       |       -       |    22.47  
   1    |    400      |    0.394015     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.597438     |      -       |       -       |    23.04  
   1    |     80      |    0.506889     |      -       |       -       |    22.38  
   1    |    120      |    0.452959     |      -       |       -       |    22.39  
   1    |    160      |    0.451268     |      -       |       -       |    22.42  
   1    |    200      |    0.438484     |      -       |       -       |    22.39  
   1    |    240      |    0.449120     |      -       |       -       |    22.50  
   1    |    280      |    0.412986     |      -       |       -       |    22.48  
   1    |    320      |    0.404784     |      -       |       -       |    22.48  
   1    |    360      |    0.429432     |      -       |       -       |    22.47  
   1    |    400      |    0.407704     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.607919     |      -       |       -       |    23.04  
   1    |     80      |    0.486764     |      -       |       -       |    22.47  
   1    |    120      |    0.462715     |      -       |       -       |    22.45  
   1    |    160      |    0.447104     |      -       |       -       |    22.47  
   1    |    200      |    0.459455     |      -       |       -       |    22.47  
   1    |    240      |    0.419710     |      -       |       -       |    22.49  
   1    |    280      |    0.447933     |      -       |       -       |    22.48  
   1    |    320      |    0.455430     |      -       |       -       |    22.47  
   1    |    360      |    0.443124     |      -       |       -       |    22.47  
   1    |    400      |    0.428611     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.626828     |      -       |       -       |    22.96  
   1    |     80      |    0.506674     |      -       |       -       |    22.41  
   1    |    120      |    0.473698     |      -       |       -       |    22.45  
   1    |    160      |    0.413945     |      -       |       -       |    22.49  
   1    |    200      |    0.434765     |      -       |       -       |    22.47  
   1    |    240      |    0.461119     |      -       |       -       |    22.48  
   1    |    280      |    0.450844     |      -       |       -       |    22.49  
   1    |    320      |    0.418320     |      -       |       -       |    22.49  
   1    |    360      |    0.430808     |      -       |       -       |    22.49  
   1    |    400      |    0.432139     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.540995     |      -       |       -       |    23.04  
   1    |     80      |    0.473383     |      -       |       -       |    22.47  
   1    |    120      |    0.420247     |      -       |       -       |    22.45  
   1    |    160      |    0.450231     |      -       |       -       |    22.48  
   1    |    200      |    0.427247     |      -       |       -       |    22.48  
   1    |    240      |    0.419184     |      -       |       -       |    22.48  
   1    |    280      |    0.437823     |      -       |       -       |    22.46  
   1    |    320      |    0.396427     |      -       |       -       |    22.45  
   1    |    360      |    0.430249     |      -       |       -       |    22.39  
   1    |    400      |    0.418456     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.567646     |      -       |       -       |    23.04  
   1    |     80      |    0.511145     |      -       |       -       |    22.48  
   1    |    120      |    0.436924     |      -       |       -       |    22.49  
   1    |    160      |    0.444259     |      -       |       -       |    22.49  
   1    |    200      |    0.460287     |      -       |       -       |    22.49  
   1    |    240      |    0.419101     |      -       |       -       |    22.50  
   1    |    280      |    0.437347     |      -       |       -       |    22.49  
   1    |    320      |    0.416709     |      -       |       -       |    22.48  
   1    |    360      |    0.402766     |      -       |       -       |    22.47  
   1    |    400      |    0.426280     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.634820     |      -       |       -       |    23.11  
   1    |     80      |    0.524523     |      -       |       -       |    22.47  
   1    |    120      |    0.465180     |      -       |       -       |    22.48  
   1    |    160      |    0.471817     |      -       |       -       |    22.50  
   1    |    200      |    0.427665     |      -       |       -       |    22.48  
   1    |    240      |    0.424914     |      -       |       -       |    22.45  
   1    |    280      |    0.445421     |      -       |       -       |    22.38  
   1    |    320      |    0.442785     |      -       |       -       |    22.41  
   1    |    360      |    0.426781     |      -       |       -       |    22.39  
   1    |    400      |    0.465521     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.615663     |      -       |       -       |    22.97  
   1    |     80      |    0.502028     |      -       |       -       |    22.46  
   1    |    120      |    0.455080     |      -       |       -       |    22.48  
   1    |    160      |    0.448141     |      -       |       -       |    22.51  
   1    |    200      |    0.459779     |      -       |       -       |    22.49  
   1    |    240      |    0.425873     |      -       |       -       |    22.49  
   1    |    280      |    0.444166     |      -       |       -       |    22.49  
   1    |    320      |    0.443796     |      -       |       -       |    22.49  
   1    |    360      |    0.440245     |      -       |       -       |    22.50  
   1    |    400      |    0.405791     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.674839     |      -       |       -       |    23.06  
   1    |     80      |    0.546720     |      -       |       -       |    22.47  
   1    |    120      |    0.483236     |      -       |       -       |    22.44  
   1    |    160      |    0.501677     |      -       |       -       |    22.38  
   1    |    200      |    0.470724     |      -       |       -       |    22.41  
   1    |    240      |    0.428440     |      -       |       -       |    22.40  
   1    |    280      |    0.457107     |      -       |       -       |    22.40  
   1    |    320      |    0.441733     |      -       |       -       |    22.38  
   1    |    360      |    0.436415     |      -       |       -       |    22.40  
   1    |    400      |    0.444811     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.586869     |      -       |       -       |    23.03  
   1    |     80      |    0.482274     |      -       |       -       |    22.47  
   1    |    120      |    0.469711     |      -       |       -       |    22.48  
   1    |    160      |    0.447749     |      -       |       -       |    22.48  
   1    |    200      |    0.445327     |      -       |       -       |    22.48  
   1    |    240      |    0.462640     |      -       |       -       |    22.50  
   1    |    280      |    0.422462     |      -       |       -       |    22.50  
   1    |    320      |    0.428357     |      -       |       -       |    22.50  
   1    |    360      |    0.443832     |      -       |       -       |    22.50  
   1    |    400      |    0.401690     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.645876     |      -       |       -       |    22.94  
   1    |     80      |    0.523341     |      -       |       -       |    22.35  
   1    |    120      |    0.486838     |      -       |       -       |    22.39  
   1    |    160      |    0.466281     |      -       |       -       |    22.39  
   1    |    200      |    0.453621     |      -       |       -       |    22.41  
   1    |    240      |    0.449647     |      -       |       -       |    22.39  
   1    |    280      |    0.435347     |      -       |       -       |    22.41  
   1    |    320      |    0.447933     |      -       |       -       |    22.38  
   1    |    360      |    0.462582     |      -       |       -       |    22.41  
   1    |    400      |    0.405392     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.629845     |      -       |       -       |    23.10  
   1    |     80      |    0.549768     |      -       |       -       |    22.47  
   1    |    120      |    0.484949     |      -       |       -       |    22.48  
   1    |    160      |    0.488190     |      -       |       -       |    22.47  
   1    |    200      |    0.452461     |      -       |       -       |    22.48  
   1    |    240      |    0.442413     |      -       |       -       |    22.47  
   1    |    280      |    0.453205     |      -       |       -       |    22.50  
   1    |    320      |    0.448230     |      -       |       -       |    22.46  
   1    |    360      |    0.417558     |      -       |       -       |    22.49  
   1    |    400      |    0.426517     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.598059     |      -       |       -       |    22.95  
   1    |     80      |    0.501978     |      -       |       -       |    22.33  
   1    |    120      |    0.477044     |      -       |       -       |    22.39  
   1    |    160      |    0.451626     |      -       |       -       |    22.38  
   1    |    200      |    0.425932     |      -       |       -       |    22.42  
   1    |    240      |    0.485125     |      -       |       -       |    22.41  
   1    |    280      |    0.445343     |      -       |       -       |    22.42  
   1    |    320      |    0.453160     |      -       |       -       |    22.40  
   1    |    360      |    0.429774     |      -       |       -       |    22.41  
   1    |    400      |    0.443824     |      -       |      



Start training and testing:

 Epoch  | 每40个Batch |   训练集 Loss   |  测试集 Loss  |  测试集准确率   |    时间    
--------------------------------------------------------------------------------
   1    |     40      |    0.713471     |      -       |       -       |    24.77  
   1    |     80      |    0.523151     |      -       |       -       |    24.06  
   1    |    120      |    0.469240     |      -       |       -       |    24.11  
   1    |    160      |    0.505017     |      -       |       -       |    24.04  


KeyboardInterrupt: 