In [1]:
!pip install transformers==4.26.0
!pip install accelerate==0.16.0

Collecting accelerate==0.16.0
  Using cached accelerate-0.16.0-py3-none-any.whl (199 kB)
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.2.0
    Uninstalling accelerate-0.2.0:
      Successfully uninstalled accelerate-0.2.0
Successfully installed accelerate-0.16.0


In [2]:
import math
import json
import numpy as np
import random
import torch
from torch.utils.data import DataLoader, Dataset 
from transformers import AdamW, BertForQuestionAnswering, BertTokenizerFast
from transformers import AutoModelForQuestionAnswering, AutoTokenizer
from transformers import get_linear_schedule_with_warmup

from tqdm.auto import tqdm

device = torch.device("cuda", 1) if torch.cuda.is_available() else "cpu"

# Fix random seed for reproducibility
def same_seeds(seed):
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True
same_seeds(7414)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Change "fp16_training" to True to support automatic mixed precision training (fp16)
fp16_training = True
fp16 = True
if fp16_training:
    %pip install accelerate==0.2.0
    from accelerate import Accelerator
    accelerator = Accelerator(fp16)
    device = accelerator.device

# Documentation for the toolkit:  https://huggingface.co/docs/accelerate/

Collecting accelerate==0.2.0
  Using cached accelerate-0.2.0-py3-none-any.whl (47 kB)
Installing collected packages: accelerate
  Attempting uninstall: accelerate
    Found existing installation: accelerate 0.16.0
    Uninstalling accelerate-0.16.0:
      Successfully uninstalled accelerate-0.16.0
Successfully installed accelerate-0.2.0
Note: you may need to restart the kernel to use updated packages.


In [4]:
model = AutoModelForQuestionAnswering.from_pretrained("luhua/chinese_pretrain_mrc_macbert_large").to(device)
tokenizer = AutoTokenizer.from_pretrained("luhua/chinese_pretrain_mrc_macbert_large")

# You can safely ignore the warning message (it pops up because new prediction heads for QA are initialized randomly)

In [5]:
def read_data(file):
    with open(file, 'r', encoding="utf-8") as reader:
        data = json.load(reader)
    return data["questions"], data["paragraphs"]

train_questions, train_paragraphs = read_data("./hw7_train.json")
dev_questions, dev_paragraphs = read_data("./hw7_dev.json")
test_questions, test_paragraphs = read_data("./hw7_test.json")

In [6]:
# Tokenize questions and paragraphs separately
# 「add_special_tokens」 is set to False since special tokens will be added when tokenized questions and paragraphs are combined in datset __getitem__ 

train_questions_tokenized = tokenizer([train_question["question_text"] for train_question in train_questions], add_special_tokens=False)
dev_questions_tokenized = tokenizer([dev_question["question_text"] for dev_question in dev_questions], add_special_tokens=False)
test_questions_tokenized = tokenizer([test_question["question_text"] for test_question in test_questions], add_special_tokens=False) 

train_paragraphs_tokenized = tokenizer(train_paragraphs, add_special_tokens=False)
dev_paragraphs_tokenized = tokenizer(dev_paragraphs, add_special_tokens=False)
test_paragraphs_tokenized = tokenizer(test_paragraphs, add_special_tokens=False)

# You can safely ignore the warning message as tokenized sequences will be futher processed in datset __getitem__ before passing to model

In [7]:
DOC_STRIDE = None


In [8]:
class QA_Dataset(Dataset):
    def __init__(self, split, questions, tokenized_questions, tokenized_paragraphs):
        self.split = split
        self.questions = questions
        self.tokenized_questions = tokenized_questions
        self.tokenized_paragraphs = tokenized_paragraphs
        self.max_question_len = 100
        self.max_paragraph_len = 300
        
        ##### TODO: Change value of doc_stride #####
        self.doc_stride = int(0.5 * self.max_paragraph_len)
        ############################################
        global DOC_STRIDE
        DOC_STRIDE = self.doc_stride
        ############################################
        # Input sequence length = [CLS] + question + [SEP] + paragraph + [SEP]
        self.max_seq_len = 1 + self.max_question_len + 1 + self.max_paragraph_len + 1

    def __len__(self):
        return len(self.questions)

    def __getitem__(self, idx):
        question = self.questions[idx]
        tokenized_question = self.tokenized_questions[idx]
        tokenized_paragraph = self.tokenized_paragraphs[question["paragraph_id"]]

        ##### TODO: Preprocessing #####
        # Hint: How to prevent model from learning something it should not learn

        if self.split == "train":
            # Convert answer's start/end positions in paragraph_text to start/end positions in tokenized_paragraph  
            answer_start_token = tokenized_paragraph.char_to_token(question["answer_start"])
            answer_end_token = tokenized_paragraph.char_to_token(question["answer_end"])

#             # A single window is obtained by slicing the portion of paragraph containing the answer
#             mid = (answer_start_token + answer_end_token) // 2
#             paragraph_start = max(0, min(mid - self.max_paragraph_len // 2, len(tokenized_paragraph) - self.max_paragraph_len))
#             paragraph_end = paragraph_start + self.max_paragraph_len
            
            # A single window is obtained by slicing the portion of paragraph containing the answer
            mid = (answer_start_token + answer_end_token) // 2
            prefix_len = int(random.random() * self.max_paragraph_len)
            postfix_len = self.max_paragraph_len - prefix_len
            paragraph_start, paragraph_end = mid - prefix_len, mid + postfix_len
            if paragraph_start < 0:
                paragraph_end -= paragraph_start
                paragraph_start = 0
            if paragraph_end >= len(tokenized_paragraph):
                paragraph_end = len(tokenized_paragraph) - 1
            
            # Slice question/paragraph and add special tokens (101: CLS, 102: SEP)
            input_ids_question = [101] + tokenized_question.ids[:self.max_question_len] + [102] 
            input_ids_paragraph = tokenized_paragraph.ids[paragraph_start : paragraph_end] + [102]
            
            # Convert answer's start/end positions in tokenized_paragraph to start/end positions in the window  
            answer_start_token += len(input_ids_question) - paragraph_start
            answer_end_token += len(input_ids_question) - paragraph_start
            
            # Pad sequence and obtain inputs to model 
            input_ids, token_type_ids, attention_mask = self.padding(input_ids_question, input_ids_paragraph)
            return torch.tensor(input_ids), torch.tensor(token_type_ids), torch.tensor(attention_mask), answer_start_token, answer_end_token

        # Validation/Testing
        else:
            input_ids_list, token_type_ids_list, attention_mask_list = [], [], []
            
            # Paragraph is split into several windows, each with start positions separated by step "doc_stride"
            for i in range(0, len(tokenized_paragraph), self.doc_stride):
                
                # Slice question/paragraph and add special tokens (101: CLS, 102: SEP)
                input_ids_question = [101] + tokenized_question.ids[:self.max_question_len] + [102]
                input_ids_paragraph = tokenized_paragraph.ids[i : i + self.max_paragraph_len] + [102]
                
                # Pad sequence and obtain inputs to model
                input_ids, token_type_ids, attention_mask = self.padding(input_ids_question, input_ids_paragraph)
                
                input_ids_list.append(input_ids)
                token_type_ids_list.append(token_type_ids)
                attention_mask_list.append(attention_mask)
            
            return torch.tensor(input_ids_list), torch.tensor(token_type_ids_list), torch.tensor(attention_mask_list)

    def padding(self, input_ids_question, input_ids_paragraph):
        # Pad zeros if sequence length is shorter than max_seq_len
        padding_len = self.max_seq_len - len(input_ids_question) - len(input_ids_paragraph)
        # Indices of input sequence tokens in the vocabulary
        input_ids = input_ids_question + input_ids_paragraph + [0] * padding_len
        # Segment token indices to indicate first and second portions of the inputs. Indices are selected in [0, 1]
        token_type_ids = [0] * len(input_ids_question) + [1] * len(input_ids_paragraph) + [0] * padding_len
        # Mask to avoid performing attention on padding token indices. Mask values selected in [0, 1]
        attention_mask = [1] * (len(input_ids_question) + len(input_ids_paragraph)) + [0] * padding_len
        
        return input_ids, token_type_ids, attention_mask

train_set = QA_Dataset("train", train_questions, train_questions_tokenized, train_paragraphs_tokenized)
dev_set = QA_Dataset("dev", dev_questions, dev_questions_tokenized, dev_paragraphs_tokenized)
test_set = QA_Dataset("test", test_questions, test_questions_tokenized, test_paragraphs_tokenized)

train_batch_size = 4

# Note: Do NOT change batch size of dev_loader / test_loader !
# Although batch size=1, it is actually a batch consisting of several windows from the same QA pair
train_loader = DataLoader(train_set, batch_size=train_batch_size, shuffle=True, pin_memory=True)
dev_loader = DataLoader(dev_set, batch_size=1, shuffle=False, pin_memory=True)
test_loader = DataLoader(test_set, batch_size=1, shuffle=False, pin_memory=True)

In [9]:
def evaluate(data, output, paragraph, paragraph_tokenized):
    ##### TODO: Postprocessing #####
    # There is a bug and room for improvement in postprocessing 
    # Hint: Open your prediction file to see what is wrong 
    
    answer = ''
    max_prob = float('-inf')
    num_of_windows = data[0].shape[1]
    
    paragraph_start_index = 0
    paragraph_end_index = 0
    
    for k in range(num_of_windows):
        # Obtain answer by choosing the most probable start position / end position
#         start_prob, start_index = torch.max(output.start_logits[k], dim=0)
#         end_prob, end_index = torch.max(output.end_logits[k], dim=0)
        
#         # Probability of answer is calculated as sum of start_prob and end_prob
#         prob = start_prob + end_prob

        mask = (data[1][0][k].bool() & data[2][0][k].bool()).to(device)
    
        masked_output_start = torch.masked_select(output.start_logits[k], mask)
        masked_output_start = masked_output_start[:-1]
        
        start_prob, start_index = torch.max(masked_output_start, dim=0)
        
        masked_output_end = torch.masked_select(output.end_logits[k], mask)
        masked_output_end = masked_output_end[start_index: -1]
        
        end_prob, end_index = torch.max(masked_output_end, dim=0)
        
        end_index += start_index
        
        # Probability of answer is calculated as sum of start_prob and end_prob
        prob = start_prob + end_prob
        masked_data = torch.masked_select(data[0][0][k].to(device), mask)[:-1]
        
        # Replace answer if calculated probability is larger than previous windows
        if (prob > max_prob) and (start_index <= end_index <= (start_index + 50)):
            max_prob = prob
            paragraph_start_index = start_index.item() + (DOC_STRIDE * k)
            paragraph_end_index = end_index.item() + (DOC_STRIDE * k)
            # Convert tokens to chars (e.g. [1920, 7032] --> "大 金")
#             answer = tokenizer.decode(data[0][0][k][start_index : end_index + 1])
            answer = tokenizer.decode(masked_data[start_index : end_index + 1])
            
#     # 轉換 [UNK]        
#     if "[UNK]" in answer:
#         # 原始答案 #
#         print(f"原始答案: {answer}")
#         ##########
#         char_count = 0
#         start_flag = False

#         for i, token in enumerate(paragraph_tokenized):
#             if token in ('[UNK]', '[CLS]', '[SEP]'):
#                 if i == paragraph_start_index:
#                     new_start = char_count
#                 if i == paragraph_end_index:
#                     new_end = char_count
#                 char_count += 1
#             else:
#                 for char in token:
#                     if i == paragraph_start_index and not start_flag:
#                         new_start = char_count
#                         start_flag = True
#                     if i == paragraph_end_index:
#                         new_end = char_count
#                     if char == "#":
#                         continue
#                     else:
#                         while char_count < len(paragraph) and char != paragraph[char_count]:
#                             char_count += 1
#                         char_count += 1
            
#         answer = paragraph[new_start: new_end+1]
#         print(f"修正後答案: {answer}")
#         print(paragraph_start_index, paragraph_end_index)
#         print(new_start, new_end)
#         print("-"*50)

    
    ##########
    char_count = 0
    start_flag = False

    for i, token in enumerate(paragraph_tokenized):
        if token in ('[UNK]', '[CLS]', '[SEP]'):
            if i == paragraph_start_index:
                new_start = char_count
            if i == paragraph_end_index:
                new_end = char_count
            char_count += 1
        else:
            for char in token:
                if i == paragraph_start_index and not start_flag:
                    new_start = char_count
                    start_flag = True
                if i == paragraph_end_index:
                    new_end = char_count
                if char == "#":
                    continue
                else:
                    while char_count < len(paragraph) and char != paragraph[char_count]:
                        char_count += 1
                    char_count += 1
    # 後處理
    # 1.轉換 [UNK]        
    if "[UNK]" in answer:
        # 原始答案 #
        print(f"原始答案: {answer}")
        # 修正後答案 #
        answer = paragraph[new_start: new_end+1]
        print(f"修正後答案: {answer}")
        print("-"*50)

###########################################################
    
#     # 2.把沒抓到的引號補回來
#     # 尾巴引號漏掉
#     if (new_end+1) < len(paragraph):
#         if paragraph[new_start] == "「" and paragraph[new_end+1] == "」":
#             answer = paragraph[new_start: new_end+2]
#     # 頭尾引號都漏掉
#     if new_start > 0 and (new_end+1) < len(paragraph):
#         if paragraph[new_start-1] == "「" and paragraph[new_end+1] == "」":
#             answer = paragraph[new_start-1: new_end+2]
#     # 尾巴隔一個標點符號才接引號
#     if (new_end+2) < len(paragraph):
#         if paragraph[new_start] == "「" and paragraph[new_end+2] == "」":
#             answer = paragraph[new_start: new_end+3]
#     if new_start > 0 and (new_end+2) < len(paragraph):
#         if paragraph[new_start-1] == "「" and paragraph[new_end+2] == "」":
#             answer = paragraph[new_start-1: new_end+3]
    
#     # 3.把「為了」與「因為」抓回來
#     if new_start > 1:
#         if paragraph[new_start-2: new_start] == "為了":
#             answer = paragraph[new_start-2: new_end+1]
#         if paragraph[new_start-2: new_start] == "因為":
#             answer = paragraph[new_start-2: new_end+1]

    
#     if new_start > 0:
#         if paragraph[new_start-1] == "因":
#             answer = paragraph[new_start-1: new_end+1]

###########################################################
        
    # 最後移除空格 (e.g. "大 金" --> "大金")
    answer = answer.replace(' ', '')
    
###########################################################

#     # 把「為了」與「因為」拿掉
#     if len(answer) > 2:
#         if answer[:2] == "為了" or answer[:2] == "因為":
#             answer = answer[2:]
    
#     # 把「」拿掉
#     if len(answer) > 2:
#         if answer[0] == "「" and answer[-1] == "」":
#             answer = answer[1:-1]
    
###########################################################

    if len(answer) > 1:
        if "「" not in answer and answer[-1] == "」":
            answer = answer[:-1]
#     if 3 > len(answer) > 1:
#         if answer[0] == "第":
#             i = 1
#             while i < len(answer) and answer[i] in list("123456789一二三四五六七八九"):
#                 i += 1
#             answer = answer[1:i]
    
    return answer

In [10]:
num_epoch = 3  # 3
validation = True  # True
logging_step = 500
learning_rate = 5e-6
# accum_iter = 8

optimizer = AdamW(model.parameters(), lr=learning_rate)

##### TODO: Apply linear learning rate decay #####
total_steps = len(train_loader) * num_epoch
# warmup_steps = int(0.05 * total_steps)

scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=100, num_training_steps=total_steps)
##################################################

if fp16_training:
    model, optimizer, train_loader = accelerator.prepare(model, optimizer, train_loader) 

model.train()

print("Start Training ...")

for epoch in range(num_epoch):
    step = 1
    train_loss = train_acc = 0
    
    for batch_idx, data in enumerate(tqdm(train_loader)):
        # Load all data into GPU
        data = [i.to(device) for i in data]
        
        # Model inputs: input_ids, token_type_ids, attention_mask, start_positions, end_positions (Note: only "input_ids" is mandatory)
        # Model outputs: start_logits, end_logits, loss (return when start_positions/end_positions are provided)  
        output = model(input_ids=data[0], token_type_ids=data[1], attention_mask=data[2], start_positions=data[3], end_positions=data[4])

        # Choose the most probable start position / end position
        start_index = torch.argmax(output.start_logits, dim=1)
        end_index = torch.argmax(output.end_logits, dim=1)

        # Prediction is correct only if both start_index and end_index are correct
        train_acc += ((start_index == data[3]) & (end_index == data[4])).float().mean()
        train_loss += output.loss

        if fp16_training:
            accelerator.backward(output.loss)
        else:
            output.loss.backward()

        ##### TODO: Apply linear learning rate decay #####
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        step += 1
        ##################################################
        
        # Print training loss and accuracy over past logging step
        if step % logging_step == 0:
            print(f"Epoch {epoch + 1} | Step {step} | loss = {train_loss.item() / logging_step:.3f}, acc = {train_acc / logging_step:.3f}")
            train_loss = train_acc = 0
            
    if validation:
        print("Evaluating Dev Set ...")
        model.eval()
        with torch.no_grad():
            dev_acc = 0
            for i, data in enumerate(tqdm(dev_loader)):
                output = model(input_ids=data[0].squeeze(dim=0).to(device), token_type_ids=data[1].squeeze(dim=0).to(device),
                       attention_mask=data[2].squeeze(dim=0).to(device))
                # prediction is correct only if answer text exactly matches
                dev_acc += evaluate(data, output, dev_paragraphs[dev_questions[i]['paragraph_id']], dev_paragraphs_tokenized[dev_questions[i]['paragraph_id']].tokens) == dev_questions[i]["answer_text"]
            print(f"Validation | Epoch {epoch + 1} | acc = {dev_acc / len(dev_loader):.3f}")
        model.train()

# Save a model and its configuration file to the directory 「saved_model」 
# i.e. there are two files under the direcory 「saved_model」: 「pytorch_model.bin」 and 「config.json」
# Saved model can be re-loaded using 「model = BertForQuestionAnswering.from_pretrained("saved_model")」
print("Saving Model ...")
model_save_dir = "./testmodel9" 
model.save_pretrained(model_save_dir)
# part1 - 加速 (有幫助加速) - simple submit
    # 991/991 [05:52<00:00, 3.45it/s]
    # to
    # 991/991 [03:00<00:00, 6.08it/s]

    # Epoch 1 | Step 900 | loss = 0.586, acc = 0.748
    # Validation | Epoch 1 | acc = 0.509
    # to
    # Epoch 1 | Step 900 | loss = 0.576, acc = 0.759
    # Validation | Epoch 1 | acc = 0.538

# part2 - LR Schedule (訓練結果有變好，驗證沒有)
    # Validation | Epoch 1 | acc = 0.539
    
# part3 - doc_stride to 0.5 (變好很多)
    # Validation | Epoch 1 | acc = 0.667

# part4 - random split training answer window (變好很多)
    # Validation | Epoch 1 | acc = 0.733
    
# part5 - Doc Length from 150 to 300 (好一點點)
    # Validation | Epoch 1 | acc = 0.727
    
# part6 - 換模型
    # Validation | Epoch 1 | acc = 0.750



Start Training ...


  7%|▋         | 498/6730 [05:39<1:09:27,  1.50it/s]

Epoch 1 | Step 500 | loss = 0.986, acc = 0.666


 15%|█▍        | 998/6730 [11:23<1:04:33,  1.48it/s]

Epoch 1 | Step 1000 | loss = 0.732, acc = 0.731


 22%|██▏       | 1498/6730 [17:08<59:14,  1.47it/s]  

Epoch 1 | Step 1500 | loss = 0.651, acc = 0.757


 30%|██▉       | 1998/6730 [22:55<57:10,  1.38it/s]  

Epoch 1 | Step 2000 | loss = 0.614, acc = 0.764


 37%|███▋      | 2498/6730 [28:50<49:29,  1.43it/s]  

Epoch 1 | Step 2500 | loss = 0.652, acc = 0.745


 45%|████▍     | 2998/6730 [34:44<43:15,  1.44it/s]  

Epoch 1 | Step 3000 | loss = 0.610, acc = 0.762


 52%|█████▏    | 3498/6730 [40:44<37:04,  1.45it/s]

Epoch 1 | Step 3500 | loss = 0.599, acc = 0.767


 59%|█████▉    | 3998/6730 [46:38<30:27,  1.49it/s]

Epoch 1 | Step 4000 | loss = 0.620, acc = 0.758


 67%|██████▋   | 4498/6730 [52:31<28:34,  1.30it/s]

Epoch 1 | Step 4500 | loss = 0.633, acc = 0.757


 74%|███████▍  | 4998/6730 [58:24<19:49,  1.46it/s]

Epoch 1 | Step 5000 | loss = 0.606, acc = 0.762


 82%|████████▏ | 5498/6730 [1:04:04<14:30,  1.42it/s]

Epoch 1 | Step 5500 | loss = 0.611, acc = 0.768


 89%|████████▉ | 5998/6730 [1:09:44<08:10,  1.49it/s]

Epoch 1 | Step 6000 | loss = 0.569, acc = 0.758


 97%|█████████▋| 6498/6730 [1:15:23<02:39,  1.46it/s]

Epoch 1 | Step 6500 | loss = 0.587, acc = 0.770


100%|██████████| 6730/6730 [1:17:59<00:00,  1.44it/s]


Evaluating Dev Set ...


 10%|▉         | 279/2863 [00:47<07:25,  5.80it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 18%|█▊        | 505/2863 [01:26<07:27,  5.27it/s]

原始答案: [UNK]
修正後答案: A
--------------------------------------------------


 31%|███       | 879/2863 [02:31<06:41,  4.95it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:52<03:38,  6.91it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [04:29<04:35,  4.68it/s]

原始答案: 東 晉 常 [UNK]
修正後答案: 東晉常璩
--------------------------------------------------


 62%|██████▏   | 1771/2863 [05:03<03:09,  5.76it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1920/2863 [05:29<02:40,  5.87it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2055/2863 [05:52<02:27,  5.49it/s]

原始答案: 大 衛 · [UNK] · 沃 爾 普
修正後答案: 大衛·L·沃爾普
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:54<01:59,  6.65it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 86%|████████▌ | 2448/2863 [06:56<01:01,  6.75it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [07:07<01:00,  5.80it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [08:10<00:00,  5.83it/s]


Validation | Epoch 1 | acc = 0.859


  7%|▋         | 498/6730 [05:56<1:16:22,  1.36it/s]

Epoch 2 | Step 500 | loss = 0.398, acc = 0.826


 15%|█▍        | 998/6730 [12:25<1:15:56,  1.26it/s]

Epoch 2 | Step 1000 | loss = 0.394, acc = 0.826


 22%|██▏       | 1498/6730 [18:56<1:04:39,  1.35it/s]

Epoch 2 | Step 1500 | loss = 0.360, acc = 0.830


 30%|██▉       | 1998/6730 [25:24<1:01:33,  1.28it/s]

Epoch 2 | Step 2000 | loss = 0.398, acc = 0.819


 37%|███▋      | 2498/6730 [31:55<54:55,  1.28it/s]  

Epoch 2 | Step 2500 | loss = 0.401, acc = 0.829


 45%|████▍     | 2998/6730 [38:32<49:43,  1.25it/s]  

Epoch 2 | Step 3000 | loss = 0.340, acc = 0.846


 52%|█████▏    | 3498/6730 [45:06<42:13,  1.28it/s]  

Epoch 2 | Step 3500 | loss = 0.368, acc = 0.843


 59%|█████▉    | 3998/6730 [51:41<35:57,  1.27it/s]

Epoch 2 | Step 4000 | loss = 0.399, acc = 0.826


 67%|██████▋   | 4498/6730 [58:17<29:44,  1.25it/s]

Epoch 2 | Step 4500 | loss = 0.341, acc = 0.847


 74%|███████▍  | 4998/6730 [1:04:51<22:55,  1.26it/s]

Epoch 2 | Step 5000 | loss = 0.372, acc = 0.837


 82%|████████▏ | 5498/6730 [1:11:21<16:02,  1.28it/s]

Epoch 2 | Step 5500 | loss = 0.390, acc = 0.840


 89%|████████▉ | 5998/6730 [1:17:52<09:42,  1.26it/s]

Epoch 2 | Step 6000 | loss = 0.371, acc = 0.839


 97%|█████████▋| 6498/6730 [1:24:20<03:00,  1.28it/s]

Epoch 2 | Step 6500 | loss = 0.347, acc = 0.845


100%|██████████| 6730/6730 [1:27:20<00:00,  1.28it/s]


Evaluating Dev Set ...


 10%|▉         | 279/2863 [00:55<08:46,  4.90it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 23%|██▎       | 663/2863 [02:11<07:36,  4.82it/s]

原始答案: 香 港 仔 中 心 [UNK] ; 西 部 的 財 富 花 園 ， 華 富 村 ， 華 貴 村 ， 數 碼 港 及 田 灣 ; 南 部 鴨 [UNK] 洲 ; 石 排 灣 ， 香 港 仔 水 塘
修正後答案: 香港仔中心”;西部的財富花園，華富村，華貴村，數碼港及田灣;南部鴨Cha洲;石排灣，香港仔水塘
--------------------------------------------------


 31%|███       | 879/2863 [02:55<07:46,  4.25it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 47%|████▋     | 1355/2863 [04:28<04:14,  5.93it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1572/2863 [05:11<05:44,  3.75it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 62%|██████▏   | 1771/2863 [05:50<03:42,  4.90it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1919/2863 [06:20<02:49,  5.57it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2055/2863 [06:47<02:55,  4.61it/s]

原始答案: 大 衛 · [UNK] · 沃 爾 普
修正後答案: 大衛·L·沃爾普
--------------------------------------------------


 72%|███████▏  | 2067/2863 [06:50<02:21,  5.62it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 81%|████████  | 2316/2863 [07:36<01:22,  6.63it/s]

原始答案: 孔 子 刪 詩 ： 在 [UNK] 史 記 [UNK] 中 看 ， 據 說 古 詩 三 千 ， 孔 子
修正後答案: 孔子刪詩：在“史記”中看，據說古詩三千，孔子
--------------------------------------------------


 86%|████████▌ | 2448/2863 [08:01<01:08,  6.05it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [08:13<01:08,  5.11it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [09:21<00:00,  5.10it/s]


Validation | Epoch 2 | acc = 0.903


  7%|▋         | 498/6730 [06:29<1:21:23,  1.28it/s]

Epoch 3 | Step 500 | loss = 0.251, acc = 0.878


 15%|█▍        | 998/6730 [12:59<1:07:34,  1.41it/s]

Epoch 3 | Step 1000 | loss = 0.253, acc = 0.882


 22%|██▏       | 1498/6730 [18:44<59:40,  1.46it/s]  

Epoch 3 | Step 1500 | loss = 0.233, acc = 0.892


 30%|██▉       | 1998/6730 [24:26<54:57,  1.44it/s]  

Epoch 3 | Step 2000 | loss = 0.247, acc = 0.879


 37%|███▋      | 2498/6730 [30:08<48:12,  1.46it/s]  

Epoch 3 | Step 2500 | loss = 0.281, acc = 0.871


 45%|████▍     | 2998/6730 [35:50<42:36,  1.46it/s]  

Epoch 3 | Step 3000 | loss = 0.237, acc = 0.890


 52%|█████▏    | 3498/6730 [41:32<36:48,  1.46it/s]

Epoch 3 | Step 3500 | loss = 0.246, acc = 0.885


 59%|█████▉    | 3999/6730 [47:15<38:28,  1.18it/s]

Epoch 3 | Step 4000 | loss = 0.241, acc = 0.877


 67%|██████▋   | 4498/6730 [52:56<25:24,  1.46it/s]

Epoch 3 | Step 4500 | loss = 0.262, acc = 0.884


 74%|███████▍  | 4999/6730 [58:39<24:23,  1.18it/s]

Epoch 3 | Step 5000 | loss = 0.235, acc = 0.889


 82%|████████▏ | 5498/6730 [1:04:19<14:04,  1.46it/s]

Epoch 3 | Step 5500 | loss = 0.214, acc = 0.897


 89%|████████▉ | 5998/6730 [1:10:01<08:19,  1.47it/s]

Epoch 3 | Step 6000 | loss = 0.229, acc = 0.886


 97%|█████████▋| 6498/6730 [1:15:43<02:37,  1.47it/s]

Epoch 3 | Step 6500 | loss = 0.216, acc = 0.896


100%|██████████| 6730/6730 [1:18:22<00:00,  1.43it/s]


Evaluating Dev Set ...


 10%|▉         | 279/2863 [00:48<07:34,  5.68it/s]

原始答案: [UNK]
修正後答案: 鍶
--------------------------------------------------


 23%|██▎       | 663/2863 [01:55<06:32,  5.60it/s]

原始答案: 香 港 仔 中 心 [UNK] ; 西 部 的 財 富 花 園 ， 華 富 村 ， 華 貴 村 ， 數 碼 港 及 田 灣 ; 南 部 鴨 [UNK] 洲 ; 石 排 灣 ， 香 港 仔 水 塘
修正後答案: 香港仔中心”;西部的財富花園，華富村，華貴村，數碼港及田灣;南部鴨Cha洲;石排灣，香港仔水塘
--------------------------------------------------


 31%|███       | 879/2863 [02:33<06:45,  4.90it/s]

原始答案: [UNK]
修正後答案: U
--------------------------------------------------


 33%|███▎      | 952/2863 [02:46<05:06,  6.24it/s]

原始答案: [UNK] 以 下 的 聲 音 對 基 底 膜 的 影 響
修正後答案: 500Hz以下的聲音對基底膜的影響
--------------------------------------------------


 47%|████▋     | 1355/2863 [03:56<03:41,  6.81it/s]

原始答案: 1. [UNK]
修正後答案: 1.5
--------------------------------------------------


 55%|█████▍    | 1573/2863 [04:33<04:38,  4.64it/s]

原始答案: 常 [UNK]
修正後答案: 常璩
--------------------------------------------------


 60%|█████▉    | 1715/2863 [04:58<03:38,  5.26it/s]

原始答案: 晶 體 內 部 的 原 子 排 列 具 有 周 期 性 ， 外 部 具 有 規 則 的 形 狀 ， 例 如 金 剛 石 。 [UNK] [UNK] [UNK] 首 先 提 出 了 晶 體 的 規 則 外 觀 ， 因 為 晶 體
修正後答案: 晶體內部的原子排列具有周期性，外部具有規則的形狀，例如金剛石。Rene Rust Ayuyi首先提出了晶體的規則外觀，因為晶體
--------------------------------------------------


 61%|██████    | 1746/2863 [05:03<02:56,  6.32it/s]

原始答案: [UNK]
修正後答案: I
--------------------------------------------------


 62%|██████▏   | 1771/2863 [05:07<03:13,  5.64it/s]

原始答案: [UNK]
修正後答案: Z
--------------------------------------------------


 67%|██████▋   | 1920/2863 [05:33<02:42,  5.80it/s]

原始答案: [UNK] 東 海
修正後答案: JR東海
--------------------------------------------------


 72%|███████▏  | 2056/2863 [05:57<02:33,  5.24it/s]

原始答案: 大 衛 · [UNK] · 沃 爾 普
修正後答案: 大衛·L·沃爾普
--------------------------------------------------


 72%|███████▏  | 2067/2863 [05:59<02:02,  6.52it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


 86%|████████▌ | 2448/2863 [07:02<00:58,  7.05it/s]

原始答案: [UNK]
修正後答案: 鉰
--------------------------------------------------


 88%|████████▊ | 2514/2863 [07:12<01:01,  5.71it/s]

原始答案: [UNK]
修正後答案: 1
--------------------------------------------------


100%|██████████| 2863/2863 [08:12<00:00,  5.81it/s]


Validation | Epoch 3 | acc = 0.914
Saving Model ...


In [11]:
print("Evaluating Test Set ...")

result = []

model.eval()
with torch.no_grad():
    for i, data in enumerate(tqdm(test_loader)):
        output = model(input_ids=data[0].squeeze(dim=0).to(device), token_type_ids=data[1].squeeze(dim=0).to(device),
                       attention_mask=data[2].squeeze(dim=0).to(device))
        result.append(evaluate(data, output, test_paragraphs[test_questions[i]['paragraph_id']], test_paragraphs_tokenized[test_questions[i]['paragraph_id']].tokens))

result_file = "./testresult9.csv"
with open(result_file, 'w') as f:
    f.write("ID,Answer\n")
    for i, test_question in enumerate(test_questions):
        # Replace commas in answers with empty strings (since csv is separated by comma)
        # Answers in kaggle are processed in the same way
        f.write(f"{test_question['id']},{result[i].replace(',','')}\n")

print(f"Completed! Result is in {result_file}")

Evaluating Test Set ...


  2%|▏         | 81/3524 [00:14<08:40,  6.62it/s]

原始答案: 回 [UNK] 部 落
修正後答案: 回鶻部落
--------------------------------------------------


  3%|▎         | 97/3524 [00:17<11:33,  4.94it/s]

原始答案: [UNK]
修正後答案: M
--------------------------------------------------


  5%|▌         | 184/3524 [00:33<10:38,  5.23it/s]

原始答案: 回 [UNK] 汗 國
修正後答案: 回鶻汗國
--------------------------------------------------


  6%|▌         | 217/3524 [00:38<08:48,  6.26it/s]

原始答案: 白 [UNK] 紀 中 期
修正後答案: 白堊紀中期
--------------------------------------------------


  7%|▋         | 244/3524 [00:44<10:56,  5.00it/s]

原始答案: [UNK] 大 壩
修正後答案: Jawa大壩
--------------------------------------------------


 10%|▉         | 342/3524 [01:02<08:10,  6.49it/s]

原始答案: [UNK]
修正後答案: A
--------------------------------------------------


 13%|█▎        | 475/3524 [01:26<10:26,  4.87it/s]

原始答案: [UNK]
修正後答案: K
--------------------------------------------------


 16%|█▌        | 560/3524 [01:41<08:27,  5.84it/s]

原始答案: 李 [UNK]
修正後答案: 李勣
--------------------------------------------------


 17%|█▋        | 598/3524 [01:48<07:06,  6.87it/s]

原始答案: 約 為 太 陽 的 100 [UNK] 150 倍
修正後答案: 約為太陽的100–150倍
--------------------------------------------------


 27%|██▋       | 963/3524 [02:56<05:53,  7.24it/s]

原始答案: [UNK]
修正後答案: A
--------------------------------------------------


 28%|██▊       | 974/3524 [02:58<08:06,  5.25it/s]

原始答案: 因 周 王 是 燕 王 同 母 兄 弟 ， 而 朱 允 [UNK] 怕 他 與 燕 王 呵 成 一 氣
修正後答案: 因周王是燕王同母兄弟，而朱允炆怕他與燕王呵成一氣
--------------------------------------------------


 28%|██▊       | 999/3524 [03:02<07:10,  5.87it/s]

原始答案: 久 彌 宮 妃 [UNK] 子
修正後答案: 久彌宮妃俔子
--------------------------------------------------


 29%|██▊       | 1007/3524 [03:04<07:17,  5.76it/s]

原始答案: 《 阿 [UNK] 婆 吠 陀 》
修正後答案: 《阿闥婆吠陀》
--------------------------------------------------


 31%|███       | 1079/3524 [03:17<05:53,  6.92it/s]

原始答案: 姚 [UNK]
修正後答案: 姚萇
--------------------------------------------------


 33%|███▎      | 1152/3524 [03:30<07:26,  5.31it/s]

原始答案: [UNK] - 850 型
修正後答案: TR-850型
--------------------------------------------------


 34%|███▍      | 1200/3524 [03:38<06:48,  5.69it/s]

原始答案: [UNK] [UNK] 自 動 步 槍 系 列
修正後答案: HK G3自動步槍系列
--------------------------------------------------


 35%|███▍      | 1229/3524 [03:43<06:50,  5.59it/s]

原始答案: 導 入 人 工 智 慧 化 的 新 世 代 [UNK] 系 統
修正後答案: 導入人工智慧化的新世代BRT系統
--------------------------------------------------


 37%|███▋      | 1298/3524 [03:55<06:19,  5.87it/s]

原始答案: [UNK] 水 之 戰
修正後答案: 淝水之戰
--------------------------------------------------


 39%|███▉      | 1385/3524 [04:10<05:19,  6.69it/s]

原始答案: 金 [UNK]
修正後答案: 金堉
--------------------------------------------------


 39%|███▉      | 1388/3524 [04:10<06:00,  5.93it/s]

原始答案: 《 [UNK] 早 餐 》
修正後答案: 《BBC早餐》
--------------------------------------------------


 41%|████      | 1438/3524 [04:19<04:51,  7.16it/s]

原始答案: 白 [UNK] 紀
修正後答案: 白堊紀
--------------------------------------------------


 44%|████▍     | 1546/3524 [04:38<05:50,  5.65it/s]

原始答案: 測 試 [UNK] [UNK] 村 附 近 男 性 居 民 的 脫 氧 核 糖 核 酸
修正後答案: 測試驪靬村附近男性居民的脫氧核糖核酸
--------------------------------------------------


 57%|█████▋    | 2017/3524 [06:04<05:11,  4.84it/s]

原始答案: 本 田 [UNK] 型 機 動 腳 踏 車
修正後答案: 本田A型機動腳踏車
--------------------------------------------------


 61%|██████    | 2134/3524 [06:26<03:57,  5.86it/s]

原始答案: 美 國 長 [UNK] 米
修正後答案: 美國長秈米
--------------------------------------------------


 68%|██████▊   | 2391/3524 [07:12<03:13,  5.85it/s]

原始答案: [UNK] 族
修正後答案: 撣族
--------------------------------------------------


 70%|███████   | 2482/3524 [07:28<03:20,  5.19it/s]

原始答案: [UNK] 娛 樂
修正後答案: LOEN娛樂
--------------------------------------------------


 72%|███████▏  | 2554/3524 [07:40<02:49,  5.71it/s]

原始答案: 恭 親 王 奕 [UNK]
修正後答案: 恭親王奕訢
--------------------------------------------------


 75%|███████▌  | 2647/3524 [07:57<02:44,  5.33it/s]

原始答案: 蔡 [UNK]
修正後答案: 蔡鍔
--------------------------------------------------


 76%|███████▋  | 2690/3524 [08:05<02:07,  6.52it/s]

原始答案: [UNK] - 61 戰 鬥 機
修正後答案: P-61戰鬥機
--------------------------------------------------


 82%|████████▏ | 2906/3524 [08:43<01:39,  6.24it/s]

原始答案: 白 [UNK] 紀
修正後答案: 白堊紀
--------------------------------------------------


 83%|████████▎ | 2913/3524 [08:44<02:01,  5.01it/s]

原始答案: 「 [UNK] 獎 」
修正後答案: 「Harvey獎」
--------------------------------------------------


 85%|████████▌ | 3011/3524 [09:01<01:39,  5.13it/s]

原始答案: 大 肚 平 埔 族 拍 布 拉 族 大 肚 王 與 瑯 [UNK] 番 人 的 反 抗
修正後答案: 大肚平埔族拍布拉族大肚王與瑯嶠番人的反抗
--------------------------------------------------


 89%|████████▉ | 3131/3524 [09:22<01:11,  5.50it/s]

原始答案: 《 阿 [UNK] 婆 吠 陀 》
修正後答案: 《阿闥婆吠陀》
--------------------------------------------------


 92%|█████████▏| 3237/3524 [09:41<00:49,  5.81it/s]

原始答案: 朱 允 [UNK]
修正後答案: 朱允炆
--------------------------------------------------


 93%|█████████▎| 3287/3524 [09:51<00:47,  4.98it/s]

原始答案: [UNK] - 16 戰 鬥 機
修正後答案: F-16戰鬥機
--------------------------------------------------


 95%|█████████▌| 3353/3524 [10:02<00:27,  6.13it/s]

原始答案: 盤 [UNK] 蠻
修正後答案: 盤瓠蠻
--------------------------------------------------


 97%|█████████▋| 3408/3524 [10:12<00:18,  6.15it/s]

原始答案: [UNK] 靼
修正後答案: 韃靼
--------------------------------------------------


 98%|█████████▊| 3440/3524 [10:18<00:12,  6.49it/s]

原始答案: [UNK]. [UNK]. [UNK]. [UNK]
修正後答案: T.C.B.S
--------------------------------------------------


 98%|█████████▊| 3444/3524 [10:18<00:12,  6.17it/s]

原始答案: 3000 [UNK] 顆
修正後答案: 3000垓顆
--------------------------------------------------


 99%|█████████▊| 3477/3524 [10:24<00:06,  6.78it/s]

原始答案: 趙 [UNK]
修正後答案: 趙竑
--------------------------------------------------


 99%|█████████▉| 3480/3524 [10:25<00:07,  5.79it/s]

原始答案: 512 [UNK]
修正後答案: 512 
--------------------------------------------------


100%|██████████| 3524/3524 [10:32<00:00,  5.57it/s]

Completed! Result is in ./testresult9.csv



