## Import Packages

In [2]:
!pip install transformers==4.26.0
!pip install accelerate==0.16.0

Collecting accelerate==0.16.0
  Using cached accelerate-0.16.0-py3-none-any.whl (199 kB)
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.2.0
    Uninstalling accelerate-0.2.0:
      Successfully uninstalled accelerate-0.2.0
Successfully installed accelerate-0.16.0


In [1]:
import math
import json
import numpy as np
import random
import torch
from torch.utils.data import DataLoader, Dataset 
from transformers import AdamW, BertForQuestionAnswering, BertTokenizerFast
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from transformers import get_linear_schedule_with_warmup

from tqdm.auto import tqdm

device = torch.device("cuda", 1) if torch.cuda.is_available() else "cpu"

# Fix random seed for reproducibility
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
same_seeds(7414)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Change "fp16_training" to True to support automatic mixed precision training (fp16)
# Change "fp16_training" to True to support automatic mixed precision training (fp16)
fp16_training = True
fp16 = True
!pip install accelerate==0.2.0
from accelerate import Accelerator
accelerator = Accelerator(fp16)
device = accelerator.device

# Documentation for the toolkit:  https://huggingface.co/docs/accelerate/

# Documentation for the toolkit:  https://huggingface.co/docs/accelerate/



## Load Model and Tokenizer




 

In [4]:
model = AutoModelForQuestionAnswering.from_pretrained("luhua/chinese_pretrain_mrc_macbert_large").to(device)
tokenizer = AutoTokenizer.from_pretrained("luhua/chinese_pretrain_mrc_macbert_large")

# You can safely ignore the warning message (it pops up because new prediction heads for QA are initialized randomly)

## Read Data


In [5]:
def read_data(file):
    with open(file, 'r', encoding="utf-8") as reader:
        data = json.load(reader)
    return data["questions"], data["paragraphs"]

train_questions, train_paragraphs = read_data("hw7_train.json")
dev_questions, dev_paragraphs = read_data("hw7_dev.json")
test_questions, test_paragraphs = read_data("hw7_test.json")

## Tokenize Data

In [6]:
# Tokenize questions and paragraphs separately
# 「add_special_tokens」 is set to False since special tokens will be added when tokenized questions and paragraphs are combined in datset __getitem__ 

train_questions_tokenized = tokenizer([train_question["question_text"] for train_question in train_questions], add_special_tokens=False)
dev_questions_tokenized = tokenizer([dev_question["question_text"] for dev_question in dev_questions], add_special_tokens=False)
test_questions_tokenized = tokenizer([test_question["question_text"] for test_question in test_questions], add_special_tokens=False) 

train_paragraphs_tokenized = tokenizer(train_paragraphs, add_special_tokens=False)
dev_paragraphs_tokenized = tokenizer(dev_paragraphs, add_special_tokens=False)
test_paragraphs_tokenized = tokenizer(test_paragraphs, add_special_tokens=False)

# You can safely ignore the warning message as tokenized sequences will be futher processed in datset __getitem__ before passing to model

## Dataset

In [7]:
DOC_STRIDE = None


In [8]:
class QA_Dataset(Dataset):
    def __init__(self, split, questions, tokenized_questions, tokenized_paragraphs):
        self.split = split
        self.questions = questions
        self.tokenized_questions = tokenized_questions
        self.tokenized_paragraphs = tokenized_paragraphs
        self.max_question_len = 40
        self.max_paragraph_len = 350
        
        ##### TODO: Change value of doc_stride #####
        self.doc_stride = int(0.5 * self.max_paragraph_len)
        ############################################
        global DOC_STRIDE
        DOC_STRIDE = self.doc_stride
        ############################################
        # Input sequence length = [CLS] + question + [SEP] + paragraph + [SEP]
        self.max_seq_len = 1 + self.max_question_len + 1 + self.max_paragraph_len + 1

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        tokenized_question = self.tokenized_questions[idx]
        tokenized_paragraph = self.tokenized_paragraphs[question["paragraph_id"]]

        ##### TODO: Preprocessing #####
        # Hint: How to prevent model from learning something it should not learn

        if self.split == "train":
            # Convert answer's start/end positions in paragraph_text to start/end positions in tokenized_paragraph  
            answer_start_token = tokenized_paragraph.char_to_token(question["answer_start"])
            answer_end_token = tokenized_paragraph.char_to_token(question["answer_end"])

#             # A single window is obtained by slicing the portion of paragraph containing the answer
#             mid = (answer_start_token + answer_end_token) // 2
#             paragraph_start = max(0, min(mid - self.max_paragraph_len // 2, len(tokenized_paragraph) - self.max_paragraph_len))
#             paragraph_end = paragraph_start + self.max_paragraph_len
            
            # A single window is obtained by slicing the portion of paragraph containing the answer
            mid = (answer_start_token + answer_end_token) // 2
            prefix_len = int(random.random() * self.max_paragraph_len)
            postfix_len = self.max_paragraph_len - prefix_len
            paragraph_start, paragraph_end = mid - prefix_len, mid + postfix_len
            if paragraph_start < 0:
                paragraph_end -= paragraph_start
                paragraph_start = 0
            if paragraph_end >= len(tokenized_paragraph):
                paragraph_end = len(tokenized_paragraph) - 1
            
            # Slice question/paragraph and add special tokens (101: CLS, 102: SEP)
            input_ids_question = [101] + tokenized_question.ids[:self.max_question_len] + [102] 
            input_ids_paragraph = tokenized_paragraph.ids[paragraph_start : paragraph_end] + [102]
            
            # Convert answer's start/end positions in tokenized_paragraph to start/end positions in the window  
            answer_start_token += len(input_ids_question) - paragraph_start
            answer_end_token += len(input_ids_question) - paragraph_start
            
            # Pad sequence and obtain inputs to model 
            input_ids, token_type_ids, attention_mask = self.padding(input_ids_question, input_ids_paragraph)
            return torch.tensor(input_ids), torch.tensor(token_type_ids), torch.tensor(attention_mask), answer_start_token, answer_end_token

        # Validation/Testing
        else:
            input_ids_list, token_type_ids_list, attention_mask_list = [], [], []
            
            # Paragraph is split into several windows, each with start positions separated by step "doc_stride"
            for i in range(0, len(tokenized_paragraph), self.doc_stride):
                
                # Slice question/paragraph and add special tokens (101: CLS, 102: SEP)
                input_ids_question = [101] + tokenized_question.ids[:self.max_question_len] + [102]
                input_ids_paragraph = tokenized_paragraph.ids[i : i + self.max_paragraph_len] + [102]
                
                # Pad sequence and obtain inputs to model
                input_ids, token_type_ids, attention_mask = self.padding(input_ids_question, input_ids_paragraph)
                
                input_ids_list.append(input_ids)
                token_type_ids_list.append(token_type_ids)
                attention_mask_list.append(attention_mask)
            
            return torch.tensor(input_ids_list), torch.tensor(token_type_ids_list), torch.tensor(attention_mask_list)

    def padding(self, input_ids_question, input_ids_paragraph):
        # Pad zeros if sequence length is shorter than max_seq_len
        padding_len = self.max_seq_len - len(input_ids_question) - len(input_ids_paragraph)
        # Indices of input sequence tokens in the vocabulary
        input_ids = input_ids_question + input_ids_paragraph + [0] * padding_len
        # Segment token indices to indicate first and second portions of the inputs. Indices are selected in [0, 1]
        token_type_ids = [0] * len(input_ids_question) + [1] * len(input_ids_paragraph) + [0] * padding_len
        # Mask to avoid performing attention on padding token indices. Mask values selected in [0, 1]
        attention_mask = [1] * (len(input_ids_question) + len(input_ids_paragraph)) + [0] * padding_len
        
        return input_ids, token_type_ids, attention_mask

train_set = QA_Dataset("train", train_questions, train_questions_tokenized, train_paragraphs_tokenized)
dev_set = QA_Dataset("dev", dev_questions, dev_questions_tokenized, dev_paragraphs_tokenized)
test_set = QA_Dataset("test", test_questions, test_questions_tokenized, test_paragraphs_tokenized)

train_batch_size = 4

# Note: Do NOT change batch size of dev_loader / test_loader !
# Although batch size=1, it is actually a batch consisting of several windows from the same QA pair
train_loader = DataLoader(train_set, batch_size=train_batch_size, shuffle=True, pin_memory=True)
dev_loader = DataLoader(dev_set, batch_size=1, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, pin_memory=True)

## Function for Evaluation

In [9]:
def evaluate(data, output, paragraph, paragraph_tokenized):
    ##### TODO: Postprocessing #####
    # There is a bug and room for improvement in postprocessing 
    # Hint: Open your prediction file to see what is wrong 
    
    answer = ''
    max_prob = float('-inf')
    num_of_windows = data[0].shape[1]
    
    paragraph_start_index = 0
    paragraph_end_index = 0
    
    for k in range(num_of_windows):
        # Obtain answer by choosing the most probable start position / end position
#         start_prob, start_index = torch.max(output.start_logits[k], dim=0)
#         end_prob, end_index = torch.max(output.end_logits[k], dim=0)
        
#         # Probability of answer is calculated as sum of start_prob and end_prob
#         prob = start_prob + end_prob

        mask = (data[1][0][k].bool() & data[2][0][k].bool()).to(device)
    
        masked_output_start = torch.masked_select(output.start_logits[k], mask)
        masked_output_start = masked_output_start[:-1]
        
        start_prob, start_index = torch.max(masked_output_start, dim=0)
        
        masked_output_end = torch.masked_select(output.end_logits[k], mask)
        masked_output_end = masked_output_end[start_index: -1]
        
        end_prob, end_index = torch.max(masked_output_end, dim=0)
        
        end_index += start_index
        
        # Probability of answer is calculated as sum of start_prob and end_prob
        prob = start_prob + end_prob
        masked_data = torch.masked_select(data[0][0][k].to(device), mask)[:-1]
        
        # Replace answer if calculated probability is larger than previous windows
        if (prob > max_prob) and (start_index <= end_index <= (start_index + 50)):
            max_prob = prob
            paragraph_start_index = start_index.item() + (DOC_STRIDE * k)
            paragraph_end_index = end_index.item() + (DOC_STRIDE * k)
            # Convert tokens to chars (e.g. [1920, 7032] --> "大 金")
#             answer = tokenizer.decode(data[0][0][k][start_index : end_index + 1])
            answer = tokenizer.decode(masked_data[start_index : end_index + 1])
            
#     # 轉換 [UNK]        
#     if "[UNK]" in answer:
#         # 原始答案 #
#         print(f"原始答案: {answer}")
#         ##########
#         char_count = 0
#         start_flag = False

#         for i, token in enumerate(paragraph_tokenized):
#             if token in ('[UNK]', '[CLS]', '[SEP]'):
#                 if i == paragraph_start_index:
#                     new_start = char_count
#                 if i == paragraph_end_index:
#                     new_end = char_count
#                 char_count += 1
#             else:
#                 for char in token:
#                     if i == paragraph_start_index and not start_flag:
#                         new_start = char_count
#                         start_flag = True
#                     if i == paragraph_end_index:
#                         new_end = char_count
#                     if char == "#":
#                         continue
#                     else:
#                         while char_count < len(paragraph) and char != paragraph[char_count]:
#                             char_count += 1
#                         char_count += 1
            
#         answer = paragraph[new_start: new_end+1]
#         print(f"修正後答案: {answer}")
#         print(paragraph_start_index, paragraph_end_index)
#         print(new_start, new_end)
#         print("-"*50)

    
    ##########
    char_count = 0
    start_flag = False

    for i, token in enumerate(paragraph_tokenized):
        if token in ('[UNK]', '[CLS]', '[SEP]'):
            if i == paragraph_start_index:
                new_start = char_count
            if i == paragraph_end_index:
                new_end = char_count
            char_count += 1
        else:
            for char in token:
                if i == paragraph_start_index and not start_flag:
                    new_start = char_count
                    start_flag = True
                if i == paragraph_end_index:
                    new_end = char_count
                if char == "#":
                    continue
                else:
                    while char_count < len(paragraph) and char != paragraph[char_count]:
                        char_count += 1
                    char_count += 1
    # 後處理
    # 1.轉換 [UNK]        
    if "[UNK]" in answer:
        # 原始答案 #
        print(f"原始答案: {answer}")
        # 修正後答案 #
        answer = paragraph[new_start: new_end+1]
        print(f"修正後答案: {answer}")
        print("-"*50)

###########################################################
    
#     # 2.把沒抓到的引號補回來
#     # 尾巴引號漏掉
#     if (new_end+1) < len(paragraph):
#         if paragraph[new_start] == "「" and paragraph[new_end+1] == "」":
#             answer = paragraph[new_start: new_end+2]
#     # 頭尾引號都漏掉
#     if new_start > 0 and (new_end+1) < len(paragraph):
#         if paragraph[new_start-1] == "「" and paragraph[new_end+1] == "」":
#             answer = paragraph[new_start-1: new_end+2]
#     # 尾巴隔一個標點符號才接引號
#     if (new_end+2) < len(paragraph):
#         if paragraph[new_start] == "「" and paragraph[new_end+2] == "」":
#             answer = paragraph[new_start: new_end+3]
#     if new_start > 0 and (new_end+2) < len(paragraph):
#         if paragraph[new_start-1] == "「" and paragraph[new_end+2] == "」":
#             answer = paragraph[new_start-1: new_end+3]
    
#     # 3.把「為了」與「因為」抓回來
#     if new_start > 1:
#         if paragraph[new_start-2: new_start] == "為了":
#             answer = paragraph[new_start-2: new_end+1]
#         if paragraph[new_start-2: new_start] == "因為":
#             answer = paragraph[new_start-2: new_end+1]

    
#     if new_start > 0:
#         if paragraph[new_start-1] == "因":
#             answer = paragraph[new_start-1: new_end+1]

###########################################################
        
    # 最後移除空格 (e.g. "大 金" --> "大金")
    answer = answer.replace(' ', '')
    
###########################################################

#     # 把「為了」與「因為」拿掉
#     if len(answer) > 2:
#         if answer[:2] == "為了" or answer[:2] == "因為":
#             answer = answer[2:]
    
#     # 把「」拿掉
#     if len(answer) > 2:
#         if answer[0] == "「" and answer[-1] == "」":
#             answer = answer[1:-1]
    
###########################################################

    if len(answer) > 1:
        if "「" not in answer and answer[-1] == "」":
            answer = answer[:-1]
#     if 3 > len(answer) > 1:
#         if answer[0] == "第":
#             i = 1
#             while i < len(answer) and answer[i] in list("123456789一二三四五六七八九"):
#                 i += 1
#             answer = answer[1:i]
    
    return answer

## Training

In [10]:
num_epoch = 5  # 3
validation = True  # True
logging_step = 100
learning_rate = 5e-6
# accum_iter = 8

optimizer = AdamW(model.parameters(), lr=learning_rate)

##### TODO: Apply linear learning rate decay #####
total_steps = len(train_loader) * num_epoch
# warmup_steps = int(0.05 * total_steps)

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=total_steps)
##################################################

if fp16_training:
    model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader) 

model.train()

print("Start Training ...")

for epoch in range(num_epoch):
    step = 1
    train_loss = train_acc = 0
    
    for batch_idx, data in enumerate(tqdm(train_loader)):
        # Load all data into GPU
        data = [i.to(device) for i in data]
        
        # Model inputs: input_ids, token_type_ids, attention_mask, start_positions, end_positions (Note: only "input_ids" is mandatory)
        # Model outputs: start_logits, end_logits, loss (return when start_positions/end_positions are provided)  
        output = model(input_ids=data[0], token_type_ids=data[1], attention_mask=data[2], start_positions=data[3], end_positions=data[4])

        # Choose the most probable start position / end position
        start_index = torch.argmax(output.start_logits, dim=1)
        end_index = torch.argmax(output.end_logits, dim=1)

        # Prediction is correct only if both start_index and end_index are correct
        train_acc += ((start_index == data[3]) & (end_index == data[4])).float().mean()
        train_loss += output.loss

        if fp16_training:
            accelerator.backward(output.loss)
        else:
            output.loss.backward()

        ##### TODO: Apply linear learning rate decay #####
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        step += 1
        ##################################################
        
        # Print training loss and accuracy over past logging step
        if step % logging_step == 0:
            print(f"Epoch {epoch + 1} | Step {step} | loss = {train_loss.item() / logging_step:.3f}, acc = {train_acc / logging_step:.3f}")
            train_loss = train_acc = 0
            
    if validation:
        print("Evaluating Dev Set ...")
        model.eval()
        with torch.no_grad():
            dev_acc = 0
            for i, data in enumerate(tqdm(dev_loader)):
                output = model(input_ids=data[0].squeeze(dim=0).to(device), token_type_ids=data[1].squeeze(dim=0).to(device),
                       attention_mask=data[2].squeeze(dim=0).to(device))
                # prediction is correct only if answer text exactly matches
                dev_acc += evaluate(data, output, dev_paragraphs[dev_questions[i]['paragraph_id']], dev_paragraphs_tokenized[dev_questions[i]['paragraph_id']].tokens) == dev_questions[i]["answer_text"]
            print(f"Validation | Epoch {epoch + 1} | acc = {dev_acc / len(dev_loader):.3f}")
        model.train()

# Save a model and its configuration file to the directory 「saved_model」 
# i.e. there are two files under the direcory 「saved_model」: 「pytorch_model.bin」 and 「config.json」
# Saved model can be re-loaded using 「model = BertForQuestionAnswering.from_pretrained("saved_model")」
print("Saving Model ...")
model_save_dir = "testmodel5" 
model.save_pretrained(model_save_dir)



Start Training ...


  1%|▏         | 99/6730 [01:15<1:35:18,  1.16it/s]

Epoch 1 | Step 100 | loss = 1.729, acc = 0.532


  3%|▎         | 199/6730 [02:30<1:31:52,  1.18it/s]

Epoch 1 | Step 200 | loss = 0.889, acc = 0.690


  4%|▍         | 299/6730 [03:43<1:30:42,  1.18it/s]

Epoch 1 | Step 300 | loss = 0.771, acc = 0.705


  6%|▌         | 399/6730 [04:56<1:28:56,  1.19it/s]

Epoch 1 | Step 400 | loss = 0.812, acc = 0.700


  7%|▋         | 499/6730 [06:06<1:21:55,  1.27it/s]

Epoch 1 | Step 500 | loss = 0.844, acc = 0.705


  9%|▉         | 599/6730 [07:14<1:20:44,  1.27it/s]

Epoch 1 | Step 600 | loss = 0.796, acc = 0.683


 10%|█         | 699/6730 [08:21<1:19:21,  1.27it/s]

Epoch 1 | Step 700 | loss = 0.675, acc = 0.748


 12%|█▏        | 799/6730 [09:29<1:18:04,  1.27it/s]

Epoch 1 | Step 800 | loss = 0.784, acc = 0.702


 13%|█▎        | 899/6730 [10:37<1:16:42,  1.27it/s]

Epoch 1 | Step 900 | loss = 0.779, acc = 0.745


 15%|█▍        | 999/6730 [11:44<1:15:25,  1.27it/s]

Epoch 1 | Step 1000 | loss = 0.619, acc = 0.767


 16%|█▋        | 1099/6730 [12:52<1:14:03,  1.27it/s]

Epoch 1 | Step 1100 | loss = 0.675, acc = 0.750


 18%|█▊        | 1199/6730 [14:00<1:12:38,  1.27it/s]

Epoch 1 | Step 1200 | loss = 0.727, acc = 0.712


 19%|█▉        | 1299/6730 [15:07<1:11:26,  1.27it/s]

Epoch 1 | Step 1300 | loss = 0.616, acc = 0.787


 21%|██        | 1399/6730 [16:15<1:09:44,  1.27it/s]

Epoch 1 | Step 1400 | loss = 0.594, acc = 0.765


 22%|██▏       | 1499/6730 [17:22<1:09:31,  1.25it/s]

Epoch 1 | Step 1500 | loss = 0.776, acc = 0.732


 24%|██▍       | 1599/6730 [18:31<1:06:58,  1.28it/s]

Epoch 1 | Step 1600 | loss = 0.724, acc = 0.745


 25%|██▌       | 1699/6730 [19:40<1:05:50,  1.27it/s]

Epoch 1 | Step 1700 | loss = 0.596, acc = 0.767


 27%|██▋       | 1799/6730 [20:47<1:04:36,  1.27it/s]

Epoch 1 | Step 1800 | loss = 0.564, acc = 0.748


 28%|██▊       | 1899/6730 [21:55<1:03:28,  1.27it/s]

Epoch 1 | Step 1900 | loss = 0.629, acc = 0.750


 30%|██▉       | 1999/6730 [23:03<1:02:02,  1.27it/s]

Epoch 1 | Step 2000 | loss = 0.528, acc = 0.785


 31%|███       | 2099/6730 [24:10<1:00:51,  1.27it/s]

Epoch 1 | Step 2100 | loss = 0.679, acc = 0.750


 33%|███▎      | 2199/6730 [25:18<59:29,  1.27it/s]  

Epoch 1 | Step 2200 | loss = 0.776, acc = 0.697


 34%|███▍      | 2299/6730 [26:26<58:15,  1.27it/s]

Epoch 1 | Step 2300 | loss = 0.622, acc = 0.743


 36%|███▌      | 2399/6730 [27:33<57:00,  1.27it/s]

Epoch 1 | Step 2400 | loss = 0.573, acc = 0.765


 37%|███▋      | 2499/6730 [28:40<53:57,  1.31it/s]

Epoch 1 | Step 2500 | loss = 0.727, acc = 0.745


 39%|███▊      | 2599/6730 [29:48<54:16,  1.27it/s]

Epoch 1 | Step 2600 | loss = 0.597, acc = 0.775


 40%|████      | 2699/6730 [30:55<52:53,  1.27it/s]

Epoch 1 | Step 2700 | loss = 0.639, acc = 0.762


 42%|████▏     | 2799/6730 [32:04<51:48,  1.26it/s]

Epoch 1 | Step 2800 | loss = 0.695, acc = 0.775


 43%|████▎     | 2899/6730 [33:13<51:41,  1.24it/s]

Epoch 1 | Step 2900 | loss = 0.613, acc = 0.777


 45%|████▍     | 2999/6730 [34:21<51:10,  1.22it/s]

Epoch 1 | Step 3000 | loss = 0.587, acc = 0.772


 46%|████▌     | 3099/6730 [35:32<49:28,  1.22it/s]

Epoch 1 | Step 3100 | loss = 0.580, acc = 0.785


 48%|████▊     | 3199/6730 [36:42<48:10,  1.22it/s]

Epoch 1 | Step 3200 | loss = 0.621, acc = 0.735


 49%|████▉     | 3299/6730 [37:51<45:01,  1.27it/s]

Epoch 1 | Step 3300 | loss = 0.544, acc = 0.790


 51%|█████     | 3399/6730 [39:00<45:24,  1.22it/s]

Epoch 1 | Step 3400 | loss = 0.645, acc = 0.790


 52%|█████▏    | 3499/6730 [40:07<41:31,  1.30it/s]

Epoch 1 | Step 3500 | loss = 0.680, acc = 0.738


 53%|█████▎    | 3599/6730 [41:12<39:59,  1.30it/s]

Epoch 1 | Step 3600 | loss = 0.549, acc = 0.752


 55%|█████▍    | 3699/6730 [42:18<38:45,  1.30it/s]

Epoch 1 | Step 3700 | loss = 0.638, acc = 0.752


 56%|█████▋    | 3799/6730 [43:24<36:58,  1.32it/s]

Epoch 1 | Step 3800 | loss = 0.673, acc = 0.732


 58%|█████▊    | 3899/6730 [44:29<35:48,  1.32it/s]

Epoch 1 | Step 3900 | loss = 0.754, acc = 0.750


 59%|█████▉    | 3999/6730 [45:36<34:28,  1.32it/s]

Epoch 1 | Step 4000 | loss = 0.629, acc = 0.765


 61%|██████    | 4099/6730 [46:41<33:10,  1.32it/s]

Epoch 1 | Step 4100 | loss = 0.635, acc = 0.752


 62%|██████▏   | 4199/6730 [47:54<36:57,  1.14it/s]

Epoch 1 | Step 4200 | loss = 0.532, acc = 0.772


 64%|██████▍   | 4299/6730 [49:08<33:44,  1.20it/s]

Epoch 1 | Step 4300 | loss = 0.669, acc = 0.752


 65%|██████▌   | 4399/6730 [50:22<34:11,  1.14it/s]

Epoch 1 | Step 4400 | loss = 0.691, acc = 0.743


 67%|██████▋   | 4499/6730 [51:35<31:30,  1.18it/s]

Epoch 1 | Step 4500 | loss = 0.623, acc = 0.755


 68%|██████▊   | 4599/6730 [52:46<27:36,  1.29it/s]

Epoch 1 | Step 4600 | loss = 0.712, acc = 0.738


 70%|██████▉   | 4699/6730 [53:53<25:59,  1.30it/s]

Epoch 1 | Step 4700 | loss = 0.652, acc = 0.745


 71%|███████▏  | 4799/6730 [54:58<24:53,  1.29it/s]

Epoch 1 | Step 4800 | loss = 0.640, acc = 0.750


 73%|███████▎  | 4899/6730 [56:04<23:13,  1.31it/s]

Epoch 1 | Step 4900 | loss = 0.557, acc = 0.745


 74%|███████▍  | 4999/6730 [57:10<22:08,  1.30it/s]

Epoch 1 | Step 5000 | loss = 0.563, acc = 0.777


 76%|███████▌  | 5099/6730 [58:16<21:24,  1.27it/s]

Epoch 1 | Step 5100 | loss = 0.654, acc = 0.738


 77%|███████▋  | 5199/6730 [59:23<19:47,  1.29it/s]

Epoch 1 | Step 5200 | loss = 0.619, acc = 0.740


 79%|███████▊  | 5299/6730 [1:00:28<18:10,  1.31it/s]

Epoch 1 | Step 5300 | loss = 0.560, acc = 0.785


 80%|████████  | 5399/6730 [1:01:35<17:01,  1.30it/s]

Epoch 1 | Step 5400 | loss = 0.582, acc = 0.777


 82%|████████▏ | 5499/6730 [1:02:44<16:39,  1.23it/s]

Epoch 1 | Step 5500 | loss = 0.753, acc = 0.738


 83%|████████▎ | 5599/6730 [1:03:56<16:04,  1.17it/s]

Epoch 1 | Step 5600 | loss = 0.489, acc = 0.762


 85%|████████▍ | 5699/6730 [1:05:09<14:35,  1.18it/s]

Epoch 1 | Step 5700 | loss = 0.679, acc = 0.743


 86%|████████▌ | 5799/6730 [1:06:23<13:17,  1.17it/s]

Epoch 1 | Step 5800 | loss = 0.572, acc = 0.752


 88%|████████▊ | 5899/6730 [1:07:37<11:17,  1.23it/s]

Epoch 1 | Step 5900 | loss = 0.516, acc = 0.787


 89%|████████▉ | 5999/6730 [1:08:48<10:07,  1.20it/s]

Epoch 1 | Step 6000 | loss = 0.631, acc = 0.738


 91%|█████████ | 6099/6730 [1:09:58<08:28,  1.24it/s]

Epoch 1 | Step 6100 | loss = 0.583, acc = 0.800


 92%|█████████▏| 6199/6730 [1:11:10<07:17,  1.21it/s]

Epoch 1 | Step 6200 | loss = 0.593, acc = 0.765


 94%|█████████▎| 6299/6730 [1:12:16<05:30,  1.30it/s]

Epoch 1 | Step 6300 | loss = 0.601, acc = 0.745


 95%|█████████▌| 6399/6730 [1:13:22<04:15,  1.30it/s]

Epoch 1 | Step 6400 | loss = 0.582, acc = 0.787


 97%|█████████▋| 6499/6730 [1:14:28<03:00,  1.28it/s]

Epoch 1 | Step 6500 | loss = 0.657, acc = 0.782


 98%|█████████▊| 6599/6730 [1:15:34<01:39,  1.32it/s]

Epoch 1 | Step 6600 | loss = 0.632, acc = 0.750


100%|█████████▉| 6699/6730 [1:16:45<00:25,  1.21it/s]

Epoch 1 | Step 6700 | loss = 0.679, acc = 0.722


100%|██████████| 6730/6730 [1:17:05<00:00,  1.45it/s]


Evaluating Dev Set ...


 10%|▉         | 280/2863 [00:41<06:44,  6.38it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 18%|█▊        | 506/2863 [01:15<06:28,  6.06it/s]

原始答案: [UNK]
修正後答案: A
--------------------------------------------------


 31%|███       | 879/2863 [02:11<05:56,  5.57it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:22<04:47,  6.65it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:22<03:29,  7.18it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [03:54<03:44,  5.75it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 62%|██████▏   | 1771/2863 [04:24<02:36,  6.96it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1919/2863 [04:46<02:08,  7.35it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:06<02:14,  6.01it/s]

原始答案: [UNK]
修正後答案: C
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:08<01:53,  6.99it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 77%|███████▋  | 2205/2863 [05:28<01:58,  5.56it/s]

原始答案: 外 交 政 策 ， 歷 史 學 者 [UNK] [UNK]
修正後答案: 外交政策，歷史學者 D
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:03<00:53,  7.71it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [06:12<00:48,  7.18it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [07:05<00:00,  6.73it/s]


Validation | Epoch 1 | acc = 0.862


  1%|▏         | 99/6730 [01:05<1:24:07,  1.31it/s]

Epoch 2 | Step 100 | loss = 0.450, acc = 0.815


  3%|▎         | 199/6730 [02:12<1:22:52,  1.31it/s]

Epoch 2 | Step 200 | loss = 0.337, acc = 0.842


  4%|▍         | 299/6730 [03:19<1:23:02,  1.29it/s]

Epoch 2 | Step 300 | loss = 0.421, acc = 0.810


  6%|▌         | 399/6730 [04:29<1:27:16,  1.21it/s]

Epoch 2 | Step 400 | loss = 0.362, acc = 0.850


  7%|▋         | 499/6730 [05:38<1:23:32,  1.24it/s]

Epoch 2 | Step 500 | loss = 0.419, acc = 0.832


  9%|▉         | 599/6730 [06:51<1:26:32,  1.18it/s]

Epoch 2 | Step 600 | loss = 0.377, acc = 0.830


 10%|█         | 699/6730 [08:03<1:25:00,  1.18it/s]

Epoch 2 | Step 700 | loss = 0.432, acc = 0.822


 12%|█▏        | 799/6730 [09:16<1:23:31,  1.18it/s]

Epoch 2 | Step 800 | loss = 0.455, acc = 0.787


 13%|█▎        | 899/6730 [10:29<1:22:18,  1.18it/s]

Epoch 2 | Step 900 | loss = 0.321, acc = 0.840


 15%|█▍        | 999/6730 [11:41<1:21:58,  1.17it/s]

Epoch 2 | Step 1000 | loss = 0.354, acc = 0.857


 16%|█▋        | 1099/6730 [12:55<1:20:44,  1.16it/s]

Epoch 2 | Step 1100 | loss = 0.339, acc = 0.842


 18%|█▊        | 1199/6730 [14:09<1:18:22,  1.18it/s]

Epoch 2 | Step 1200 | loss = 0.362, acc = 0.812


 19%|█▉        | 1299/6730 [15:22<1:16:37,  1.18it/s]

Epoch 2 | Step 1300 | loss = 0.452, acc = 0.825


 21%|██        | 1399/6730 [16:34<1:15:30,  1.18it/s]

Epoch 2 | Step 1400 | loss = 0.318, acc = 0.847


 22%|██▏       | 1499/6730 [17:47<1:13:54,  1.18it/s]

Epoch 2 | Step 1500 | loss = 0.350, acc = 0.837


 24%|██▍       | 1599/6730 [19:00<1:12:53,  1.17it/s]

Epoch 2 | Step 1600 | loss = 0.371, acc = 0.825


 25%|██▌       | 1699/6730 [20:13<1:10:59,  1.18it/s]

Epoch 2 | Step 1700 | loss = 0.396, acc = 0.842


 27%|██▋       | 1799/6730 [21:24<1:04:10,  1.28it/s]

Epoch 2 | Step 1800 | loss = 0.431, acc = 0.817


 28%|██▊       | 1899/6730 [22:31<1:02:35,  1.29it/s]

Epoch 2 | Step 1900 | loss = 0.387, acc = 0.822


 30%|██▉       | 1999/6730 [23:38<1:01:35,  1.28it/s]

Epoch 2 | Step 2000 | loss = 0.465, acc = 0.810


 31%|███       | 2099/6730 [24:45<1:00:07,  1.28it/s]

Epoch 2 | Step 2100 | loss = 0.382, acc = 0.822


 33%|███▎      | 2199/6730 [25:52<58:51,  1.28it/s]  

Epoch 2 | Step 2200 | loss = 0.402, acc = 0.782


 34%|███▍      | 2299/6730 [26:59<57:27,  1.29it/s]

Epoch 2 | Step 2300 | loss = 0.416, acc = 0.817


 36%|███▌      | 2399/6730 [28:06<56:07,  1.29it/s]

Epoch 2 | Step 2400 | loss = 0.353, acc = 0.857


 37%|███▋      | 2499/6730 [29:13<55:05,  1.28it/s]

Epoch 2 | Step 2500 | loss = 0.433, acc = 0.817


 39%|███▊      | 2599/6730 [30:20<53:32,  1.29it/s]

Epoch 2 | Step 2600 | loss = 0.372, acc = 0.840


 40%|████      | 2699/6730 [31:27<52:10,  1.29it/s]

Epoch 2 | Step 2700 | loss = 0.295, acc = 0.855


 42%|████▏     | 2799/6730 [32:34<51:05,  1.28it/s]

Epoch 2 | Step 2800 | loss = 0.345, acc = 0.840


 43%|████▎     | 2899/6730 [33:40<49:39,  1.29it/s]

Epoch 2 | Step 2900 | loss = 0.398, acc = 0.832


 45%|████▍     | 2999/6730 [34:47<48:24,  1.28it/s]

Epoch 2 | Step 3000 | loss = 0.347, acc = 0.835


 46%|████▌     | 3099/6730 [35:54<47:28,  1.27it/s]

Epoch 2 | Step 3100 | loss = 0.377, acc = 0.832


 48%|████▊     | 3199/6730 [37:01<45:45,  1.29it/s]

Epoch 2 | Step 3200 | loss = 0.431, acc = 0.830


 49%|████▉     | 3299/6730 [38:08<44:32,  1.28it/s]

Epoch 2 | Step 3300 | loss = 0.341, acc = 0.855


 51%|█████     | 3399/6730 [39:15<43:36,  1.27it/s]

Epoch 2 | Step 3400 | loss = 0.458, acc = 0.830


 52%|█████▏    | 3499/6730 [40:22<41:58,  1.28it/s]

Epoch 2 | Step 3500 | loss = 0.363, acc = 0.832


 53%|█████▎    | 3599/6730 [41:29<40:41,  1.28it/s]

Epoch 2 | Step 3600 | loss = 0.377, acc = 0.842


 55%|█████▍    | 3699/6730 [42:36<39:16,  1.29it/s]

Epoch 2 | Step 3700 | loss = 0.361, acc = 0.845


 56%|█████▋    | 3799/6730 [43:43<38:08,  1.28it/s]

Epoch 2 | Step 3800 | loss = 0.406, acc = 0.855


 58%|█████▊    | 3899/6730 [44:49<36:44,  1.28it/s]

Epoch 2 | Step 3900 | loss = 0.389, acc = 0.835


 59%|█████▉    | 3999/6730 [45:56<35:28,  1.28it/s]

Epoch 2 | Step 4000 | loss = 0.379, acc = 0.820


 61%|██████    | 4099/6730 [47:03<34:12,  1.28it/s]

Epoch 2 | Step 4100 | loss = 0.324, acc = 0.860


 62%|██████▏   | 4199/6730 [48:11<32:53,  1.28it/s]

Epoch 2 | Step 4200 | loss = 0.372, acc = 0.832


 64%|██████▍   | 4299/6730 [49:18<31:43,  1.28it/s]

Epoch 2 | Step 4300 | loss = 0.349, acc = 0.825


 65%|██████▌   | 4399/6730 [50:25<30:29,  1.27it/s]

Epoch 2 | Step 4400 | loss = 0.351, acc = 0.850


 67%|██████▋   | 4499/6730 [51:32<29:05,  1.28it/s]

Epoch 2 | Step 4500 | loss = 0.337, acc = 0.860


 68%|██████▊   | 4599/6730 [52:39<27:39,  1.28it/s]

Epoch 2 | Step 4600 | loss = 0.393, acc = 0.837


 70%|██████▉   | 4699/6730 [53:48<27:36,  1.23it/s]

Epoch 2 | Step 4700 | loss = 0.404, acc = 0.837


 71%|███████▏  | 4799/6730 [54:52<24:42,  1.30it/s]

Epoch 2 | Step 4800 | loss = 0.434, acc = 0.805


 73%|███████▎  | 4899/6730 [55:58<23:27,  1.30it/s]

Epoch 2 | Step 4900 | loss = 0.342, acc = 0.850


 74%|███████▍  | 4999/6730 [57:04<22:15,  1.30it/s]

Epoch 2 | Step 5000 | loss = 0.307, acc = 0.875


 76%|███████▌  | 5099/6730 [58:10<20:51,  1.30it/s]

Epoch 2 | Step 5100 | loss = 0.323, acc = 0.842


 77%|███████▋  | 5199/6730 [59:16<19:37,  1.30it/s]

Epoch 2 | Step 5200 | loss = 0.467, acc = 0.822


 79%|███████▊  | 5299/6730 [1:00:22<18:20,  1.30it/s]

Epoch 2 | Step 5300 | loss = 0.362, acc = 0.847


 80%|████████  | 5399/6730 [1:01:28<17:09,  1.29it/s]

Epoch 2 | Step 5400 | loss = 0.406, acc = 0.840


 82%|████████▏ | 5499/6730 [1:02:35<15:47,  1.30it/s]

Epoch 2 | Step 5500 | loss = 0.443, acc = 0.808


 83%|████████▎ | 5599/6730 [1:03:41<14:31,  1.30it/s]

Epoch 2 | Step 5600 | loss = 0.368, acc = 0.845


 85%|████████▍ | 5699/6730 [1:04:47<13:14,  1.30it/s]

Epoch 2 | Step 5700 | loss = 0.448, acc = 0.817


 86%|████████▌ | 5799/6730 [1:05:53<11:55,  1.30it/s]

Epoch 2 | Step 5800 | loss = 0.302, acc = 0.877


 88%|████████▊ | 5899/6730 [1:06:59<10:37,  1.30it/s]

Epoch 2 | Step 5900 | loss = 0.322, acc = 0.870


 89%|████████▉ | 5999/6730 [1:08:06<09:24,  1.30it/s]

Epoch 2 | Step 6000 | loss = 0.488, acc = 0.790


 91%|█████████ | 6099/6730 [1:09:12<08:08,  1.29it/s]

Epoch 2 | Step 6100 | loss = 0.382, acc = 0.830


 92%|█████████▏| 6199/6730 [1:10:18<06:50,  1.29it/s]

Epoch 2 | Step 6200 | loss = 0.309, acc = 0.832


 94%|█████████▎| 6299/6730 [1:11:24<05:31,  1.30it/s]

Epoch 2 | Step 6300 | loss = 0.316, acc = 0.850


 95%|█████████▌| 6399/6730 [1:12:30<04:14,  1.30it/s]

Epoch 2 | Step 6400 | loss = 0.397, acc = 0.825


 97%|█████████▋| 6499/6730 [1:13:36<02:58,  1.30it/s]

Epoch 2 | Step 6500 | loss = 0.374, acc = 0.832


 98%|█████████▊| 6599/6730 [1:14:43<01:40,  1.30it/s]

Epoch 2 | Step 6600 | loss = 0.384, acc = 0.845


100%|█████████▉| 6699/6730 [1:15:49<00:23,  1.30it/s]

Epoch 2 | Step 6700 | loss = 0.359, acc = 0.852


100%|██████████| 6730/6730 [1:16:09<00:00,  1.47it/s]


Evaluating Dev Set ...


 10%|▉         | 280/2863 [00:41<06:39,  6.47it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 31%|███       | 879/2863 [02:12<06:04,  5.45it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:23<04:48,  6.62it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:22<03:23,  7.40it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [03:55<03:42,  5.81it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 61%|██████    | 1747/2863 [04:21<02:51,  6.52it/s]

原始答案: [UNK]
修正後答案: I
--------------------------------------------------


 62%|██████▏   | 1771/2863 [04:24<02:37,  6.95it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1920/2863 [04:47<02:29,  6.29it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:07<02:19,  5.79it/s]

原始答案: [UNK]. [UNK]. [UNK] [UNK]
修正後答案: C.L. M
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:09<01:56,  6.85it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 77%|███████▋  | 2205/2863 [05:29<02:01,  5.43it/s]

原始答案: 落 實 外 交 政 策 ， 歷 史 學 者 [UNK] [UNK]
修正後答案: 落實外交政策，歷史學者 D
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:04<00:53,  7.77it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [06:13<00:49,  7.05it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [07:05<00:00,  6.73it/s]


Validation | Epoch 2 | acc = 0.904


  1%|▏         | 99/6730 [01:05<1:24:59,  1.30it/s]

Epoch 3 | Step 100 | loss = 0.230, acc = 0.887


  3%|▎         | 199/6730 [02:11<1:24:04,  1.29it/s]

Epoch 3 | Step 200 | loss = 0.233, acc = 0.892


  4%|▍         | 299/6730 [03:18<1:22:59,  1.29it/s]

Epoch 3 | Step 300 | loss = 0.236, acc = 0.902


  6%|▌         | 399/6730 [04:25<1:21:57,  1.29it/s]

Epoch 3 | Step 400 | loss = 0.275, acc = 0.875


  7%|▋         | 499/6730 [05:32<1:21:26,  1.28it/s]

Epoch 3 | Step 500 | loss = 0.202, acc = 0.900


  9%|▉         | 599/6730 [06:39<1:19:14,  1.29it/s]

Epoch 3 | Step 600 | loss = 0.213, acc = 0.873


 10%|█         | 699/6730 [07:46<1:18:58,  1.27it/s]

Epoch 3 | Step 700 | loss = 0.236, acc = 0.885


 12%|█▏        | 799/6730 [08:53<1:16:57,  1.28it/s]

Epoch 3 | Step 800 | loss = 0.207, acc = 0.902


 13%|█▎        | 899/6730 [10:00<1:15:22,  1.29it/s]

Epoch 3 | Step 900 | loss = 0.242, acc = 0.892


 15%|█▍        | 999/6730 [11:07<1:14:58,  1.27it/s]

Epoch 3 | Step 1000 | loss = 0.323, acc = 0.842


 16%|█▋        | 1099/6730 [12:14<1:13:28,  1.28it/s]

Epoch 3 | Step 1100 | loss = 0.223, acc = 0.897


 18%|█▊        | 1199/6730 [13:21<1:12:51,  1.27it/s]

Epoch 3 | Step 1200 | loss = 0.211, acc = 0.902


 19%|█▉        | 1299/6730 [14:28<1:10:22,  1.29it/s]

Epoch 3 | Step 1300 | loss = 0.262, acc = 0.885


 21%|██        | 1399/6730 [15:35<1:08:49,  1.29it/s]

Epoch 3 | Step 1400 | loss = 0.180, acc = 0.900


 22%|██▏       | 1499/6730 [16:42<1:07:59,  1.28it/s]

Epoch 3 | Step 1500 | loss = 0.222, acc = 0.882


 24%|██▍       | 1599/6730 [17:49<1:06:43,  1.28it/s]

Epoch 3 | Step 1600 | loss = 0.257, acc = 0.882


 25%|██▌       | 1699/6730 [18:56<1:05:44,  1.28it/s]

Epoch 3 | Step 1700 | loss = 0.256, acc = 0.860


 27%|██▋       | 1799/6730 [20:03<1:04:29,  1.27it/s]

Epoch 3 | Step 1800 | loss = 0.273, acc = 0.877


 28%|██▊       | 1899/6730 [21:10<1:02:59,  1.28it/s]

Epoch 3 | Step 1900 | loss = 0.187, acc = 0.912


 30%|██▉       | 1999/6730 [22:17<1:01:33,  1.28it/s]

Epoch 3 | Step 2000 | loss = 0.176, acc = 0.912


 31%|███       | 2099/6730 [23:24<1:00:08,  1.28it/s]

Epoch 3 | Step 2100 | loss = 0.252, acc = 0.890


 33%|███▎      | 2199/6730 [24:30<58:59,  1.28it/s]  

Epoch 3 | Step 2200 | loss = 0.273, acc = 0.885


 34%|███▍      | 2299/6730 [25:37<57:52,  1.28it/s]

Epoch 3 | Step 2300 | loss = 0.189, acc = 0.905


 36%|███▌      | 2399/6730 [26:45<56:03,  1.29it/s]

Epoch 3 | Step 2400 | loss = 0.206, acc = 0.895


 37%|███▋      | 2499/6730 [27:52<56:06,  1.26it/s]

Epoch 3 | Step 2500 | loss = 0.323, acc = 0.862


 39%|███▊      | 2599/6730 [29:00<53:56,  1.28it/s]

Epoch 3 | Step 2600 | loss = 0.198, acc = 0.907


 40%|████      | 2699/6730 [30:07<52:35,  1.28it/s]

Epoch 3 | Step 2700 | loss = 0.211, acc = 0.905


 42%|████▏     | 2799/6730 [31:15<50:58,  1.29it/s]

Epoch 3 | Step 2800 | loss = 0.263, acc = 0.873


 43%|████▎     | 2899/6730 [32:22<49:45,  1.28it/s]

Epoch 3 | Step 2900 | loss = 0.204, acc = 0.897


 45%|████▍     | 2999/6730 [33:29<48:36,  1.28it/s]

Epoch 3 | Step 3000 | loss = 0.275, acc = 0.862


 46%|████▌     | 3099/6730 [34:36<47:31,  1.27it/s]

Epoch 3 | Step 3100 | loss = 0.225, acc = 0.900


 48%|████▊     | 3199/6730 [35:43<45:45,  1.29it/s]

Epoch 3 | Step 3200 | loss = 0.296, acc = 0.857


 49%|████▉     | 3299/6730 [36:50<44:36,  1.28it/s]

Epoch 3 | Step 3300 | loss = 0.261, acc = 0.870


 51%|█████     | 3399/6730 [37:58<44:07,  1.26it/s]

Epoch 3 | Step 3400 | loss = 0.205, acc = 0.885


 52%|█████▏    | 3499/6730 [39:05<41:58,  1.28it/s]

Epoch 3 | Step 3500 | loss = 0.235, acc = 0.902


 53%|█████▎    | 3599/6730 [40:12<40:50,  1.28it/s]

Epoch 3 | Step 3600 | loss = 0.221, acc = 0.900


 55%|█████▍    | 3699/6730 [41:19<39:10,  1.29it/s]

Epoch 3 | Step 3700 | loss = 0.226, acc = 0.900


 56%|█████▋    | 3799/6730 [42:26<38:08,  1.28it/s]

Epoch 3 | Step 3800 | loss = 0.215, acc = 0.887


 58%|█████▊    | 3899/6730 [43:33<37:11,  1.27it/s]

Epoch 3 | Step 3900 | loss = 0.264, acc = 0.877


 59%|█████▉    | 3999/6730 [44:40<35:22,  1.29it/s]

Epoch 3 | Step 4000 | loss = 0.211, acc = 0.890


 61%|██████    | 4099/6730 [45:47<34:35,  1.27it/s]

Epoch 3 | Step 4100 | loss = 0.260, acc = 0.885


 62%|██████▏   | 4199/6730 [46:54<33:33,  1.26it/s]

Epoch 3 | Step 4200 | loss = 0.251, acc = 0.892


 64%|██████▍   | 4299/6730 [48:01<31:51,  1.27it/s]

Epoch 3 | Step 4300 | loss = 0.275, acc = 0.865


 65%|██████▌   | 4399/6730 [49:08<30:36,  1.27it/s]

Epoch 3 | Step 4400 | loss = 0.260, acc = 0.875


 67%|██████▋   | 4499/6730 [50:15<28:43,  1.29it/s]

Epoch 3 | Step 4500 | loss = 0.226, acc = 0.897


 68%|██████▊   | 4599/6730 [51:22<27:32,  1.29it/s]

Epoch 3 | Step 4600 | loss = 0.237, acc = 0.877


 70%|██████▉   | 4699/6730 [52:29<26:28,  1.28it/s]

Epoch 3 | Step 4700 | loss = 0.274, acc = 0.870


 71%|███████▏  | 4799/6730 [53:36<25:12,  1.28it/s]

Epoch 3 | Step 4800 | loss = 0.235, acc = 0.890


 73%|███████▎  | 4899/6730 [54:43<24:35,  1.24it/s]

Epoch 3 | Step 4900 | loss = 0.179, acc = 0.907


 74%|███████▍  | 4999/6730 [55:51<22:33,  1.28it/s]

Epoch 3 | Step 5000 | loss = 0.278, acc = 0.877


 76%|███████▌  | 5099/6730 [56:58<21:16,  1.28it/s]

Epoch 3 | Step 5100 | loss = 0.227, acc = 0.895


 77%|███████▋  | 5199/6730 [58:05<19:57,  1.28it/s]

Epoch 3 | Step 5200 | loss = 0.221, acc = 0.910


 79%|███████▊  | 5299/6730 [59:13<18:42,  1.28it/s]

Epoch 3 | Step 5300 | loss = 0.211, acc = 0.877


 80%|████████  | 5399/6730 [1:00:20<17:26,  1.27it/s]

Epoch 3 | Step 5400 | loss = 0.185, acc = 0.890


 82%|████████▏ | 5499/6730 [1:01:27<15:55,  1.29it/s]

Epoch 3 | Step 5500 | loss = 0.211, acc = 0.905


 83%|████████▎ | 5599/6730 [1:02:34<14:40,  1.28it/s]

Epoch 3 | Step 5600 | loss = 0.198, acc = 0.902


 85%|████████▍ | 5699/6730 [1:03:41<13:17,  1.29it/s]

Epoch 3 | Step 5700 | loss = 0.228, acc = 0.897


 86%|████████▌ | 5799/6730 [1:04:48<12:01,  1.29it/s]

Epoch 3 | Step 5800 | loss = 0.200, acc = 0.890


 88%|████████▊ | 5899/6730 [1:05:55<10:50,  1.28it/s]

Epoch 3 | Step 5900 | loss = 0.227, acc = 0.875


 89%|████████▉ | 5999/6730 [1:07:01<09:30,  1.28it/s]

Epoch 3 | Step 6000 | loss = 0.245, acc = 0.885


 91%|█████████ | 6099/6730 [1:08:09<08:16,  1.27it/s]

Epoch 3 | Step 6100 | loss = 0.216, acc = 0.897


 92%|█████████▏| 6199/6730 [1:09:16<06:53,  1.28it/s]

Epoch 3 | Step 6200 | loss = 0.219, acc = 0.887


 94%|█████████▎| 6299/6730 [1:10:23<05:35,  1.28it/s]

Epoch 3 | Step 6300 | loss = 0.181, acc = 0.925


 95%|█████████▌| 6399/6730 [1:11:30<04:16,  1.29it/s]

Epoch 3 | Step 6400 | loss = 0.169, acc = 0.900


 97%|█████████▋| 6499/6730 [1:12:37<03:00,  1.28it/s]

Epoch 3 | Step 6500 | loss = 0.207, acc = 0.900


 98%|█████████▊| 6599/6730 [1:13:44<01:41,  1.29it/s]

Epoch 3 | Step 6600 | loss = 0.209, acc = 0.882


100%|█████████▉| 6699/6730 [1:14:51<00:24,  1.29it/s]

Epoch 3 | Step 6700 | loss = 0.262, acc = 0.892


100%|██████████| 6730/6730 [1:15:11<00:00,  1.49it/s]


Evaluating Dev Set ...


 10%|▉         | 280/2863 [00:42<06:43,  6.40it/s]

原始答案: [UNK]
修正後答案: 鎝
--------------------------------------------------


 31%|███       | 879/2863 [02:13<06:02,  5.47it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:25<04:53,  6.50it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:26<03:27,  7.25it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [03:59<03:44,  5.74it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 61%|██████    | 1747/2863 [04:25<02:52,  6.49it/s]

原始答案: [UNK]
修正後答案: I
--------------------------------------------------


 62%|██████▏   | 1771/2863 [04:28<02:38,  6.90it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1919/2863 [04:51<02:15,  6.95it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:12<02:18,  5.81it/s]

原始答案: [UNK]. [UNK]. [UNK] [UNK]
修正後答案: C.L. M
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:14<01:55,  6.87it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 77%|███████▋  | 2205/2863 [05:34<02:05,  5.26it/s]

原始答案: [UNK] [UNK]
修正後答案:  D
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:09<00:54,  7.64it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [06:19<00:50,  6.89it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [07:12<00:00,  6.62it/s]


Validation | Epoch 3 | acc = 0.932


  1%|▏         | 99/6730 [01:06<1:26:30,  1.28it/s]

Epoch 4 | Step 100 | loss = 0.133, acc = 0.930


  3%|▎         | 199/6730 [02:13<1:25:18,  1.28it/s]

Epoch 4 | Step 200 | loss = 0.137, acc = 0.930


  4%|▍         | 299/6730 [03:20<1:23:34,  1.28it/s]

Epoch 4 | Step 300 | loss = 0.134, acc = 0.922


  6%|▌         | 399/6730 [04:28<1:22:10,  1.28it/s]

Epoch 4 | Step 400 | loss = 0.164, acc = 0.922


  7%|▋         | 499/6730 [05:35<1:21:09,  1.28it/s]

Epoch 4 | Step 500 | loss = 0.084, acc = 0.957


  9%|▉         | 599/6730 [06:42<1:19:16,  1.29it/s]

Epoch 4 | Step 600 | loss = 0.117, acc = 0.940


 10%|█         | 699/6730 [07:49<1:19:35,  1.26it/s]

Epoch 4 | Step 700 | loss = 0.139, acc = 0.922


 12%|█▏        | 799/6730 [08:57<1:17:46,  1.27it/s]

Epoch 4 | Step 800 | loss = 0.140, acc = 0.933


 13%|█▎        | 899/6730 [10:04<1:15:55,  1.28it/s]

Epoch 4 | Step 900 | loss = 0.165, acc = 0.910


 15%|█▍        | 999/6730 [11:11<1:15:28,  1.27it/s]

Epoch 4 | Step 1000 | loss = 0.132, acc = 0.925


 16%|█▋        | 1099/6730 [12:19<1:14:12,  1.26it/s]

Epoch 4 | Step 1100 | loss = 0.131, acc = 0.930


 18%|█▊        | 1199/6730 [13:26<1:12:14,  1.28it/s]

Epoch 4 | Step 1200 | loss = 0.147, acc = 0.933


 19%|█▉        | 1299/6730 [14:34<1:11:07,  1.27it/s]

Epoch 4 | Step 1300 | loss = 0.169, acc = 0.912


 21%|██        | 1399/6730 [15:41<1:09:22,  1.28it/s]

Epoch 4 | Step 1400 | loss = 0.124, acc = 0.942


 22%|██▏       | 1499/6730 [16:48<1:07:49,  1.29it/s]

Epoch 4 | Step 1500 | loss = 0.120, acc = 0.930


 24%|██▍       | 1599/6730 [17:55<1:06:05,  1.29it/s]

Epoch 4 | Step 1600 | loss = 0.158, acc = 0.930


 25%|██▌       | 1699/6730 [19:02<1:06:02,  1.27it/s]

Epoch 4 | Step 1700 | loss = 0.140, acc = 0.927


 27%|██▋       | 1799/6730 [20:09<1:03:43,  1.29it/s]

Epoch 4 | Step 1800 | loss = 0.116, acc = 0.938


 28%|██▊       | 1899/6730 [21:17<1:02:54,  1.28it/s]

Epoch 4 | Step 1900 | loss = 0.140, acc = 0.917


 30%|██▉       | 1999/6730 [22:24<1:01:55,  1.27it/s]

Epoch 4 | Step 2000 | loss = 0.131, acc = 0.930


 31%|███       | 2099/6730 [23:32<1:00:24,  1.28it/s]

Epoch 4 | Step 2100 | loss = 0.108, acc = 0.955


 33%|███▎      | 2199/6730 [24:39<1:00:05,  1.26it/s]

Epoch 4 | Step 2200 | loss = 0.103, acc = 0.945


 34%|███▍      | 2299/6730 [25:47<58:11,  1.27it/s]  

Epoch 4 | Step 2300 | loss = 0.125, acc = 0.933


 36%|███▌      | 2399/6730 [26:55<56:29,  1.28it/s]

Epoch 4 | Step 2400 | loss = 0.206, acc = 0.920


 37%|███▋      | 2499/6730 [28:02<55:05,  1.28it/s]

Epoch 4 | Step 2500 | loss = 0.123, acc = 0.942


 39%|███▊      | 2599/6730 [29:10<53:48,  1.28it/s]

Epoch 4 | Step 2600 | loss = 0.143, acc = 0.922


 40%|████      | 2699/6730 [30:18<52:39,  1.28it/s]

Epoch 4 | Step 2700 | loss = 0.171, acc = 0.925


 42%|████▏     | 2799/6730 [31:26<51:51,  1.26it/s]

Epoch 4 | Step 2800 | loss = 0.128, acc = 0.945


 43%|████▎     | 2899/6730 [32:33<49:43,  1.28it/s]

Epoch 4 | Step 2900 | loss = 0.169, acc = 0.902


 45%|████▍     | 2999/6730 [33:40<48:38,  1.28it/s]

Epoch 4 | Step 3000 | loss = 0.143, acc = 0.922


 46%|████▌     | 3099/6730 [34:47<47:30,  1.27it/s]

Epoch 4 | Step 3100 | loss = 0.112, acc = 0.938


 48%|████▊     | 3199/6730 [35:54<45:58,  1.28it/s]

Epoch 4 | Step 3200 | loss = 0.173, acc = 0.912


 49%|████▉     | 3299/6730 [37:02<44:51,  1.27it/s]

Epoch 4 | Step 3300 | loss = 0.191, acc = 0.895


 51%|█████     | 3399/6730 [38:10<43:46,  1.27it/s]

Epoch 4 | Step 3400 | loss = 0.121, acc = 0.942


 52%|█████▏    | 3499/6730 [39:18<42:21,  1.27it/s]

Epoch 4 | Step 3500 | loss = 0.213, acc = 0.910


 53%|█████▎    | 3599/6730 [40:26<41:09,  1.27it/s]

Epoch 4 | Step 3600 | loss = 0.127, acc = 0.927


 55%|█████▍    | 3699/6730 [41:34<39:33,  1.28it/s]

Epoch 4 | Step 3700 | loss = 0.092, acc = 0.952


 56%|█████▋    | 3799/6730 [42:41<39:33,  1.23it/s]

Epoch 4 | Step 3800 | loss = 0.115, acc = 0.933


 58%|█████▊    | 3899/6730 [43:49<36:48,  1.28it/s]

Epoch 4 | Step 3900 | loss = 0.156, acc = 0.922


 59%|█████▉    | 3999/6730 [44:57<36:42,  1.24it/s]

Epoch 4 | Step 4000 | loss = 0.168, acc = 0.915


 61%|██████    | 4099/6730 [46:06<35:04,  1.25it/s]

Epoch 4 | Step 4100 | loss = 0.152, acc = 0.935


 62%|██████▏   | 4199/6730 [47:15<33:18,  1.27it/s]

Epoch 4 | Step 4200 | loss = 0.107, acc = 0.950


 64%|██████▍   | 4299/6730 [48:22<31:45,  1.28it/s]

Epoch 4 | Step 4300 | loss = 0.128, acc = 0.930


 65%|██████▌   | 4399/6730 [49:30<30:12,  1.29it/s]

Epoch 4 | Step 4400 | loss = 0.117, acc = 0.938


 67%|██████▋   | 4499/6730 [50:37<29:00,  1.28it/s]

Epoch 4 | Step 4500 | loss = 0.135, acc = 0.938


 68%|██████▊   | 4599/6730 [51:45<28:13,  1.26it/s]

Epoch 4 | Step 4600 | loss = 0.145, acc = 0.925


 70%|██████▉   | 4699/6730 [52:53<27:09,  1.25it/s]

Epoch 4 | Step 4700 | loss = 0.158, acc = 0.935


 71%|███████▏  | 4799/6730 [54:00<25:14,  1.28it/s]

Epoch 4 | Step 4800 | loss = 0.109, acc = 0.940


 73%|███████▎  | 4899/6730 [55:07<23:58,  1.27it/s]

Epoch 4 | Step 4900 | loss = 0.111, acc = 0.925


 74%|███████▍  | 4999/6730 [56:15<22:46,  1.27it/s]

Epoch 4 | Step 5000 | loss = 0.142, acc = 0.922


 76%|███████▌  | 5099/6730 [57:23<21:20,  1.27it/s]

Epoch 4 | Step 5100 | loss = 0.164, acc = 0.920


 77%|███████▋  | 5199/6730 [58:30<20:00,  1.28it/s]

Epoch 4 | Step 5200 | loss = 0.152, acc = 0.922


 79%|███████▊  | 5299/6730 [59:38<18:49,  1.27it/s]

Epoch 4 | Step 5300 | loss = 0.176, acc = 0.938


 80%|████████  | 5399/6730 [1:00:46<17:33,  1.26it/s]

Epoch 4 | Step 5400 | loss = 0.089, acc = 0.952


 82%|████████▏ | 5499/6730 [1:01:53<15:56,  1.29it/s]

Epoch 4 | Step 5500 | loss = 0.143, acc = 0.920


 83%|████████▎ | 5599/6730 [1:03:01<14:42,  1.28it/s]

Epoch 4 | Step 5600 | loss = 0.131, acc = 0.933


 85%|████████▍ | 5699/6730 [1:04:09<13:27,  1.28it/s]

Epoch 4 | Step 5700 | loss = 0.130, acc = 0.925


 86%|████████▌ | 5799/6730 [1:05:17<12:23,  1.25it/s]

Epoch 4 | Step 5800 | loss = 0.167, acc = 0.935


 88%|████████▊ | 5899/6730 [1:06:24<10:52,  1.27it/s]

Epoch 4 | Step 5900 | loss = 0.112, acc = 0.938


 89%|████████▉ | 5999/6730 [1:07:32<09:25,  1.29it/s]

Epoch 4 | Step 6000 | loss = 0.121, acc = 0.942


 91%|█████████ | 6099/6730 [1:08:39<08:14,  1.28it/s]

Epoch 4 | Step 6100 | loss = 0.117, acc = 0.940


 92%|█████████▏| 6199/6730 [1:09:46<06:56,  1.28it/s]

Epoch 4 | Step 6200 | loss = 0.135, acc = 0.938


 94%|█████████▎| 6299/6730 [1:10:53<05:34,  1.29it/s]

Epoch 4 | Step 6300 | loss = 0.135, acc = 0.925


 95%|█████████▌| 6399/6730 [1:12:00<04:20,  1.27it/s]

Epoch 4 | Step 6400 | loss = 0.162, acc = 0.910


 97%|█████████▋| 6499/6730 [1:13:08<03:03,  1.26it/s]

Epoch 4 | Step 6500 | loss = 0.150, acc = 0.935


 98%|█████████▊| 6599/6730 [1:14:15<01:43,  1.27it/s]

Epoch 4 | Step 6600 | loss = 0.133, acc = 0.938


100%|█████████▉| 6699/6730 [1:15:22<00:24,  1.26it/s]

Epoch 4 | Step 6700 | loss = 0.125, acc = 0.925


100%|██████████| 6730/6730 [1:15:43<00:00,  1.48it/s]


Evaluating Dev Set ...


  8%|▊         | 221/2863 [00:33<07:17,  6.04it/s]

原始答案: [UNK]
修正後答案: W
--------------------------------------------------


 10%|▉         | 280/2863 [00:42<06:43,  6.40it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 31%|███       | 879/2863 [02:14<06:04,  5.45it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:26<05:06,  6.24it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:27<03:26,  7.30it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [04:00<03:48,  5.65it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 61%|██████    | 1747/2863 [04:26<02:56,  6.32it/s]

原始答案: [UNK]
修正後答案: I
--------------------------------------------------


 62%|██████▏   | 1771/2863 [04:30<02:39,  6.83it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1919/2863 [04:53<02:12,  7.12it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:14<02:18,  5.83it/s]

原始答案: [UNK]. [UNK]. [UNK] [UNK]
修正後答案: C.L. M
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:15<01:55,  6.87it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 77%|███████▋  | 2205/2863 [05:36<02:02,  5.35it/s]

原始答案: 歷 史 學 者 [UNK] [UNK]
修正後答案: 歷史學者 D
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:11<00:55,  7.50it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [06:20<00:51,  6.78it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [07:14<00:00,  6.59it/s]


Validation | Epoch 4 | acc = 0.938


  1%|▏         | 99/6730 [01:07<1:26:19,  1.28it/s]

Epoch 5 | Step 100 | loss = 0.099, acc = 0.945


  3%|▎         | 199/6730 [02:14<1:26:25,  1.26it/s]

Epoch 5 | Step 200 | loss = 0.086, acc = 0.945


  4%|▍         | 299/6730 [03:22<1:26:05,  1.25it/s]

Epoch 5 | Step 300 | loss = 0.081, acc = 0.950


  6%|▌         | 399/6730 [04:30<1:23:24,  1.26it/s]

Epoch 5 | Step 400 | loss = 0.084, acc = 0.952


  7%|▋         | 499/6730 [05:38<1:21:54,  1.27it/s]

Epoch 5 | Step 500 | loss = 0.098, acc = 0.960


  9%|▉         | 599/6730 [06:46<1:21:35,  1.25it/s]

Epoch 5 | Step 600 | loss = 0.080, acc = 0.950


 10%|█         | 699/6730 [07:54<1:19:35,  1.26it/s]

Epoch 5 | Step 700 | loss = 0.086, acc = 0.955


 12%|█▏        | 799/6730 [09:02<1:18:01,  1.27it/s]

Epoch 5 | Step 800 | loss = 0.075, acc = 0.962


 13%|█▎        | 899/6730 [10:09<1:17:44,  1.25it/s]

Epoch 5 | Step 900 | loss = 0.068, acc = 0.955


 15%|█▍        | 999/6730 [11:17<1:15:29,  1.27it/s]

Epoch 5 | Step 1000 | loss = 0.113, acc = 0.945


 16%|█▋        | 1099/6730 [12:25<1:13:37,  1.27it/s]

Epoch 5 | Step 1100 | loss = 0.148, acc = 0.945


 18%|█▊        | 1199/6730 [13:33<1:12:04,  1.28it/s]

Epoch 5 | Step 1200 | loss = 0.097, acc = 0.947


 19%|█▉        | 1299/6730 [14:40<1:10:36,  1.28it/s]

Epoch 5 | Step 1300 | loss = 0.077, acc = 0.955


 21%|██        | 1399/6730 [15:48<1:10:06,  1.27it/s]

Epoch 5 | Step 1400 | loss = 0.074, acc = 0.950


 22%|██▏       | 1499/6730 [16:55<1:08:27,  1.27it/s]

Epoch 5 | Step 1500 | loss = 0.070, acc = 0.970


 24%|██▍       | 1599/6730 [18:03<1:07:04,  1.27it/s]

Epoch 5 | Step 1600 | loss = 0.106, acc = 0.952


 25%|██▌       | 1699/6730 [19:11<1:05:32,  1.28it/s]

Epoch 5 | Step 1700 | loss = 0.091, acc = 0.950


 27%|██▋       | 1799/6730 [20:18<1:04:15,  1.28it/s]

Epoch 5 | Step 1800 | loss = 0.104, acc = 0.960


 28%|██▊       | 1899/6730 [21:26<1:03:06,  1.28it/s]

Epoch 5 | Step 1900 | loss = 0.089, acc = 0.955


 30%|██▉       | 1999/6730 [22:35<1:09:41,  1.13it/s]

Epoch 5 | Step 2000 | loss = 0.106, acc = 0.942


 31%|███       | 2099/6730 [23:45<1:01:50,  1.25it/s]

Epoch 5 | Step 2100 | loss = 0.116, acc = 0.927


 33%|███▎      | 2199/6730 [24:55<1:00:46,  1.24it/s]

Epoch 5 | Step 2200 | loss = 0.122, acc = 0.945


 34%|███▍      | 2299/6730 [26:06<57:03,  1.29it/s]  

Epoch 5 | Step 2300 | loss = 0.079, acc = 0.955


 36%|███▌      | 2399/6730 [27:17<55:01,  1.31it/s]  

Epoch 5 | Step 2400 | loss = 0.083, acc = 0.940


 37%|███▋      | 2499/6730 [28:25<54:21,  1.30it/s]

Epoch 5 | Step 2500 | loss = 0.127, acc = 0.935


 39%|███▊      | 2599/6730 [29:31<53:08,  1.30it/s]

Epoch 5 | Step 2600 | loss = 0.150, acc = 0.942


 40%|████      | 2699/6730 [30:37<51:52,  1.29it/s]

Epoch 5 | Step 2700 | loss = 0.079, acc = 0.967


 42%|████▏     | 2799/6730 [31:43<50:39,  1.29it/s]

Epoch 5 | Step 2800 | loss = 0.073, acc = 0.962


 43%|████▎     | 2899/6730 [32:50<49:21,  1.29it/s]

Epoch 5 | Step 2900 | loss = 0.084, acc = 0.940


 45%|████▍     | 2999/6730 [33:56<47:54,  1.30it/s]

Epoch 5 | Step 3000 | loss = 0.140, acc = 0.930


 46%|████▌     | 3099/6730 [35:02<46:47,  1.29it/s]

Epoch 5 | Step 3100 | loss = 0.089, acc = 0.955


 48%|████▊     | 3199/6730 [36:08<45:30,  1.29it/s]

Epoch 5 | Step 3200 | loss = 0.084, acc = 0.965


 49%|████▉     | 3299/6730 [37:15<44:10,  1.29it/s]

Epoch 5 | Step 3300 | loss = 0.066, acc = 0.970


 51%|█████     | 3399/6730 [38:21<42:50,  1.30it/s]

Epoch 5 | Step 3400 | loss = 0.076, acc = 0.967


 52%|█████▏    | 3499/6730 [39:27<41:35,  1.29it/s]

Epoch 5 | Step 3500 | loss = 0.065, acc = 0.960


 53%|█████▎    | 3599/6730 [40:33<40:14,  1.30it/s]

Epoch 5 | Step 3600 | loss = 0.077, acc = 0.965


 55%|█████▍    | 3699/6730 [41:40<38:58,  1.30it/s]

Epoch 5 | Step 3700 | loss = 0.077, acc = 0.947


 56%|█████▋    | 3799/6730 [42:46<37:40,  1.30it/s]

Epoch 5 | Step 3800 | loss = 0.089, acc = 0.962


 58%|█████▊    | 3899/6730 [43:52<36:22,  1.30it/s]

Epoch 5 | Step 3900 | loss = 0.104, acc = 0.945


 59%|█████▉    | 3999/6730 [44:58<35:06,  1.30it/s]

Epoch 5 | Step 4000 | loss = 0.136, acc = 0.920


 61%|██████    | 4099/6730 [46:05<33:48,  1.30it/s]

Epoch 5 | Step 4100 | loss = 0.069, acc = 0.955


 62%|██████▏   | 4199/6730 [47:11<32:32,  1.30it/s]

Epoch 5 | Step 4200 | loss = 0.077, acc = 0.952


 64%|██████▍   | 4299/6730 [48:17<31:15,  1.30it/s]

Epoch 5 | Step 4300 | loss = 0.102, acc = 0.940


 65%|██████▌   | 4399/6730 [49:23<29:56,  1.30it/s]

Epoch 5 | Step 4400 | loss = 0.085, acc = 0.955


 67%|██████▋   | 4499/6730 [50:30<28:42,  1.30it/s]

Epoch 5 | Step 4500 | loss = 0.081, acc = 0.960


 68%|██████▊   | 4599/6730 [51:36<27:26,  1.29it/s]

Epoch 5 | Step 4600 | loss = 0.097, acc = 0.960


 70%|██████▉   | 4699/6730 [52:42<26:05,  1.30it/s]

Epoch 5 | Step 4700 | loss = 0.072, acc = 0.950


 71%|███████▏  | 4799/6730 [53:48<24:50,  1.30it/s]

Epoch 5 | Step 4800 | loss = 0.137, acc = 0.930


 73%|███████▎  | 4899/6730 [54:55<23:33,  1.29it/s]

Epoch 5 | Step 4900 | loss = 0.102, acc = 0.952


 74%|███████▍  | 4999/6730 [56:01<22:13,  1.30it/s]

Epoch 5 | Step 5000 | loss = 0.085, acc = 0.952


 76%|███████▌  | 5099/6730 [57:07<20:59,  1.29it/s]

Epoch 5 | Step 5100 | loss = 0.111, acc = 0.935


 77%|███████▋  | 5199/6730 [58:14<19:41,  1.30it/s]

Epoch 5 | Step 5200 | loss = 0.136, acc = 0.933


 79%|███████▊  | 5299/6730 [59:20<18:24,  1.30it/s]

Epoch 5 | Step 5300 | loss = 0.099, acc = 0.945


 80%|████████  | 5399/6730 [1:00:26<17:06,  1.30it/s]

Epoch 5 | Step 5400 | loss = 0.096, acc = 0.947


 82%|████████▏ | 5499/6730 [1:01:32<15:50,  1.30it/s]

Epoch 5 | Step 5500 | loss = 0.101, acc = 0.942


 83%|████████▎ | 5599/6730 [1:02:39<14:31,  1.30it/s]

Epoch 5 | Step 5600 | loss = 0.106, acc = 0.957


 85%|████████▍ | 5699/6730 [1:03:45<13:16,  1.29it/s]

Epoch 5 | Step 5700 | loss = 0.079, acc = 0.957


 86%|████████▌ | 5799/6730 [1:04:51<11:57,  1.30it/s]

Epoch 5 | Step 5800 | loss = 0.106, acc = 0.947


 88%|████████▊ | 5899/6730 [1:05:57<10:40,  1.30it/s]

Epoch 5 | Step 5900 | loss = 0.079, acc = 0.965


 89%|████████▉ | 5999/6730 [1:07:04<09:23,  1.30it/s]

Epoch 5 | Step 6000 | loss = 0.119, acc = 0.942


 91%|█████████ | 6099/6730 [1:08:10<08:05,  1.30it/s]

Epoch 5 | Step 6100 | loss = 0.095, acc = 0.952


 92%|█████████▏| 6199/6730 [1:09:16<06:49,  1.30it/s]

Epoch 5 | Step 6200 | loss = 0.072, acc = 0.952


 94%|█████████▎| 6299/6730 [1:10:22<05:32,  1.30it/s]

Epoch 5 | Step 6300 | loss = 0.073, acc = 0.962


 95%|█████████▌| 6399/6730 [1:11:29<04:15,  1.30it/s]

Epoch 5 | Step 6400 | loss = 0.075, acc = 0.960


 97%|█████████▋| 6499/6730 [1:12:35<02:58,  1.30it/s]

Epoch 5 | Step 6500 | loss = 0.088, acc = 0.940


 98%|█████████▊| 6599/6730 [1:13:41<01:40,  1.30it/s]

Epoch 5 | Step 6600 | loss = 0.068, acc = 0.965


100%|█████████▉| 6699/6730 [1:14:47<00:23,  1.30it/s]

Epoch 5 | Step 6700 | loss = 0.088, acc = 0.952


100%|██████████| 6730/6730 [1:15:07<00:00,  1.49it/s]


Evaluating Dev Set ...


 10%|▉         | 280/2863 [00:41<06:41,  6.43it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 31%|███       | 879/2863 [02:12<06:01,  5.49it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:23<04:51,  6.57it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:22<03:24,  7.38it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [03:55<03:43,  5.77it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 61%|██████    | 1747/2863 [04:21<02:52,  6.48it/s]

原始答案: [UNK]
修正後答案: I
--------------------------------------------------


 62%|██████▏   | 1771/2863 [04:24<02:38,  6.89it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1919/2863 [04:47<02:12,  7.15it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:07<02:16,  5.90it/s]

原始答案: [UNK]. [UNK]. [UNK] [UNK]
修正後答案: C.L. M
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:09<01:55,  6.90it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 77%|███████▋  | 2205/2863 [05:29<02:01,  5.44it/s]

原始答案: 歷 史 學 者 [UNK] [UNK]
修正後答案: 歷史學者 D
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:04<00:53,  7.74it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [06:13<00:49,  7.00it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [07:05<00:00,  6.73it/s]


Validation | Epoch 5 | acc = 0.945
Saving Model ...


## Testing

In [13]:
print("Evaluating Test Set ...")

result = []

model.eval()
with torch.no_grad():
    for data in tqdm(test_loader):
        output = model(input_ids=data[0].squeeze(dim=0).to(device), token_type_ids=data[1].squeeze(dim=0).to(device),
                       attention_mask=data[2].squeeze(dim=0).to(device))
        result.append(evaluate(data, output))

result_file = "testresult5.csv"
with open(result_file, 'w') as f:	
    f.write("ID,Answer\n")
    for i, test_question in enumerate(test_questions):
    # Replace commas in answers with empty strings (since csv is separated by comma)
    # Answers in kaggle are processed in the same way
        f.write(f"{test_question['id']},{result[i].replace(',','')}\n")

print(f"Completed! Result is in {result_file}")

Evaluating Test Set ...


  0%|          | 0/3524 [00:00<?, ?it/s]


TypeError: evaluate() missing 2 required positional arguments: 'paragraph' and 'paragraph_tokenized'