# Import Packages

In [1]:
!nvidia-smi

Wed Jun  7 02:19:18 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.182.03   Driver Version: 470.182.03   CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:02:00.0 Off |                  N/A |
| 76%   74C    P2   147W / 260W |   9502MiB / 11019MiB |     98%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [2]:
# !pip install torchviz

In [3]:
import my_utils

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-dco8htsj because the default path (/home/emma/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [4]:
from tqdm import tqdm

In [5]:
import numpy as np
import pandas as pd
import seaborn as sns

In [6]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, Dataset

In [7]:
from transformers import BertTokenizer, BertModel
from transformers import get_linear_schedule_with_warmup

In [8]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [9]:
from AutomaticWeightedLoss import AutomaticWeightedLoss

In [10]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
from sklearn.model_selection import GroupKFold

In [12]:
my_utils.set_seed()

# Global Variables

In [11]:
## Global Variables

# DEVICE: GPU
DEVICE = torch.device("cuda", 0)

# DataFrame 檔案路徑
TRAIN_DF_FILE_PATH = "./data/train_split.csv"
VALID_DF_FILE_PATH = "./data/valid_split.csv"
TEST_DF_FILE_PATH = "./data/test_split.csv"

CROSS_VALIDATION_TRAIN_DF_FILE_PATH = "./data/cross_validation_train_split.csv"
CROSS_VALIDATION_TEST_DF_FILE_PATH = "./data/cross_validation_test_split.csv"

RESULT_FILE_PATH = f"./cross_validation_results/cross_validation_result_test3.txt"
TASK_NUM = "_test3"

# BERT Tokenizer
TOKENIZER = BertTokenizer.from_pretrained("IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment")

# BERT Model
BERT = BertModel.from_pretrained("IDEA-CCNL/Erlangshen-Roberta-110M-Sentiment")
BERT.to(DEVICE)

# Model Settings
EPOCHS = 300
PATIENCE = 50
# LR_MODEL = 1e-5
# BATCH_SIZE = 128
### Multi-task Weighting
# ALPHA = 0.5
# TAU = 0.1
########################
LABEL_COLUMN = "Sentiment"
LABEL_MAPPING = {"neutral": 0, "negative": 1, "positive": 2}
MAPPING = {'neutral': 0, 'negative': 1, 'positive': 2, 
           'apology': 0, 'quotation': 1, 'acknowledge': 2, 
           'thanking': 3, 'question': 4, 'disagreement': 5, 
           'greeting': 6, 'reject': 7, 'conventional-closing': 8, 
           'interjection': 9, 'agreement': 10, 'answer': 11, 
           'comfort': 12, 'command': 13, 'irony': 14, 
           'statement-non-opinion': 15, 'statement-opinion': 16, 'appreciation': 17, 
           'other': 18, 'low': 0.0, 'high': 1.0, '語者一': 0, '語者二': 1}
# TASKS = ["main_loss"]  #  "future_DA_loss", "sentiment_loss", "DA_loss"
MODEL_PATH = "./cross_validation_model.ckpt"

# Define Functions

In [27]:
class Dataset(Dataset):
    def __init__(self, df, embeddings):
        self.embeddings = embeddings
        self.main_labels = torch.tensor(df.labels.apply(lambda x: x["main_label"]))
        self.future_DA_labels = torch.tensor(df.labels.apply(lambda x: x["future_DA_label"]))
        self.sentiment_labels = torch.tensor(df.labels.apply(lambda x: x["sentiment_labels"]))
        self.DA_labels = torch.tensor(df.labels.apply(lambda x: x["DA_labels"]))
        self.speaker1_labels = torch.tensor(df.labels.apply(lambda x: x["big_five_labels"]["語者一"]))
        self.speaker2_labels = torch.tensor(df.labels.apply(lambda x: x["big_five_labels"]["語者二"]))
        self.speaking_order = torch.tensor(df.labels.apply(lambda x: x["speaking_order"]))
 
    def __len__(self):
        assert self.main_labels.shape[0] == self.embeddings.shape[0]
        return self.main_labels.shape[0]

    def __getitem__(self, idx):
        embeddings = self.embeddings[idx]
        main_labels = self.main_labels[idx]
        future_DA_labels = self.future_DA_labels[idx]
        sentiment_labels = self.sentiment_labels[idx]
        DA_labels = self.DA_labels[idx]
        speaker1_labels = self.speaker1_labels[idx]
        speaker2_labels = self.speaker2_labels[idx]
        speaking_order = self.speaking_order[idx]
        return embeddings, main_labels, future_DA_labels, sentiment_labels, DA_labels, speaker1_labels, speaker2_labels, speaking_order

In [30]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()  # head = 2 or 4 or 8
        self.speaker_emb = nn.Embedding(2, 768)
        self.contextual_attention_layer = nn.MultiheadAttention(768, num_heads=8, batch_first=True, dropout=0.0)  # 0.3 > 0.7
#         self.gru_layer = nn.GRU(768, 768, batch_first=True, bidirectional=False)
        self.layer_norm = nn.LayerNorm(768)
        self.fc = nn.Linear(768, 128)
        self.relu_1 = nn.ReLU()
        self.last_attention_layer = nn.MultiheadAttention(128, num_heads=4, batch_first=True, dropout=0.0)  # 0.1 > 0.3
        self.relu_2 = nn.ReLU()
        self.main_module = nn.Sequential(
            nn.Linear(128, len(LABEL_MAPPING)) # LABEL_NUM classes for main task prediction
        )
        self.future_DA_module = nn.Sequential(
            nn.Linear(128, 19),
        )
        self.sentiment_module = nn.Sequential(
            nn.Linear(128, 3),
        )
        self.DA_module = nn.Sequential(
            nn.Linear(128, 19),
        )
        self.big_five_module = nn.Sequential(
            nn.Linear(128, 5),
        )
        
    def forward(self, embeddings, speaking_order):
        speaker_embeddings = self.speaker_emb(speaking_order)
        embeddings = embeddings + speaker_embeddings
        output, _ = self.contextual_attention_layer(embeddings, embeddings, embeddings)  # output shape: (batch_size, seq_length, hidden_dim)
#         output, _ = self.gru_layer(embeddings)
        output = self.layer_norm(output)
        output = self.fc(output)
        output = self.relu_1(output)
        # Sentiment output
        sentiment_output = self.sentiment_module(output)  # output shape: (batch_size, seq_length, num_classes)
        # DA output
        DA_output = self.DA_module(output)  # output shape: (batch_size, seq_length, num_classes)
        # Big five output
        speaker1_output = torch.zeros(output.shape[0], output.shape[2]).to(DEVICE)
        speaker2_output = torch.zeros(output.shape[0], output.shape[2]).to(DEVICE)
        # Average over time steps for Speaker 1 and Speaker 2
        for i in range(output.shape[0]):
            s1 = torch.mean(output[i, speaking_order[i] == 0, :], dim=0).to(DEVICE)
            s2 = torch.mean(output[i, speaking_order[i] == 1, :], dim=0).to(DEVICE)
            speaker1_output[i] = s1
            speaker2_output[i] = s2
        speaker1_output = self.big_five_module(speaker1_output)  # output shape: (batch_size, 5)
        speaker2_output = self.big_five_module(speaker2_output)  # output shape: (batch_size, 5)
#         1. concatenated_output = torch.cat((pooled_output, last_output), dim=1)
#         2. pooled_output, last_output = torch.mean(output, dim=1), output[:, -1, :]
#         2. output = pooled_output + last_output
        output, _ = self.last_attention_layer(output[:, -1:, :], output, output)
        output = output.squeeze(1)
        output = self.relu_2(output)
        # Future DA output
        future_DA_output = self.future_DA_module(output)  # output shape: (batch_size, num_classes)
        # Main output
        main_output = self.main_module(output)  # output shape: (batch_size, num_classes) for sentiment prediction
        return main_output, future_DA_output, sentiment_output, DA_output, speaker1_output, speaker2_output

In [32]:
def train(lr_model, batch_size, tasks, alpha, tau, train_df, valid_df, train_embedding, valid_embedding, verbose=True):
    
    # Training Record
    train_loss_dict = {}
    train_accuracy_list = []
    train_precision_list = []
    train_recall_list = []
    train_f1_list = []
    
    # Validation Record
    valid_loss_dict = {}
    valid_accuracy_list = []
    valid_precision_list = []
    valid_recall_list = []
    valid_f1_list = []
    
    # Training and Valid Dataset
    train = Dataset(df=train_df, embeddings=train_embedding)
    valid = Dataset(df=valid_df, embeddings=valid_embedding)
    
    # Training and Valid DataLoader
    train_dataloader = DataLoader(train, batch_size=batch_size, shuffle=True)
    valid_dataloader = DataLoader(valid, batch_size=batch_size, shuffle=False)
    
    # Model
    model = Model()
    model = model.to(DEVICE)

    # Loss Weights
#     if len(tasks) > 1:
#         awl = AutomaticWeightedLoss(num=len(tasks))
#         awl = awl.to(DEVICE)
    
    # Optimizer
    optimizer_model = torch.optim.Adam(model.parameters(), lr=lr_model)
#     if len(tasks) > 1:
#         optimizer_loss = torch.optim.Adam(awl.parameters(), weight_decay=0, lr=lr_loss)
    
    # LR Scheduler
    total_steps = len(train_dataloader) * EPOCHS
    scheduler = get_linear_schedule_with_warmup(optimizer_model, num_warmup_steps=int(total_steps*0.03), num_training_steps=total_steps)

    # Dynamic Loss Weight
    t_tau = int(len(train_dataloader) * tau)
    T = len(train_dataloader)
    
    # Early Stopping
    trigger_times = 0
    best_f1 = float("-inf")
    
    # Training with epochs
    for epoch_num in range(EPOCHS):
        
        # Set the state of the model to "Training"
        model.train()
        
        # To save the training result
        train_loss = {}
        train_true = torch.tensor([])
        train_pred = torch.tensor([])
        
        first_batch_loss_dict = None
        
        # Training with batches
        for t, (train_embeddings, train_main_labels, train_future_DA_labels, train_sentiment_labels, train_DA_labels, train_speaker1_labels, train_speaker2_labels, train_speaking_order) in enumerate(tqdm(train_dataloader, disable=(not verbose))):
            
            # Current step (Start from 1)
            # t = t + 1
            
            # Feed the data into the model
            train_embeddings = train_embeddings.to(DEVICE)
            train_main_labels = train_main_labels.to(DEVICE)
            train_future_DA_labels = train_future_DA_labels.to(DEVICE)
            train_sentiment_labels = train_sentiment_labels.to(DEVICE)
            train_DA_labels = train_DA_labels.to(DEVICE)
            train_speaker1_labels = train_speaker1_labels.to(DEVICE)
            train_speaker2_labels = train_speaker2_labels.to(DEVICE)
            train_speaking_order = train_speaking_order.to(DEVICE)
            
            optimizer_model.zero_grad()
#             if len(tasks) > 1:
#                 optimizer_loss.zero_grad()
            
            main_output, future_DA_output, sentiment_output, DA_output, speaker1_output, speaker2_output = model(train_embeddings, train_speaking_order)
            
            # Reshape sequence output and labels
            sentiment_output = sentiment_output.view(-1, 3)
            train_sentiment_labels = train_sentiment_labels.view(-1)
            
            # Reshape sequence output and labels
            DA_output = DA_output.view(-1, 19)
            train_DA_labels = train_DA_labels.view(-1)
            
            loss_dict = my_utils.multi_task_loss(
                main_output, 
                future_DA_output,
                sentiment_output, 
                DA_output, 
                speaker1_output, 
                speaker2_output, 
                train_main_labels, 
                train_future_DA_labels,
                train_sentiment_labels, 
                train_DA_labels, 
                train_speaker1_labels, 
                train_speaker2_labels,
            )
            
            if len(tasks) == 1:
                loss = loss_dict["total_loss"]
            else:
                if t == 0:
                    first_batch_loss_dict = loss_dict.copy()
                mu = t / t_tau if t <= t_tau else 1.0  # (t / T)
                losses = [loss_dict["main_loss"],]
                weights = [mu,]
                for task_name in tasks:
                    if task_name == "main_loss":
                        continue
                    else:
                        task_loss = loss_dict[task_name]
                        first_batch_task_loss = first_batch_loss_dict[task_name]
                        lambda_ = (task_loss.item() / first_batch_task_loss.item()) ** alpha
                        lambda_ = ((t_tau - t) / t_tau) * lambda_ if t <= t_tau else 0.0
                        losses.append(task_loss)
                        weights.append(lambda_)
                assert len(losses) == len(weights)
                losses_tensor = torch.stack(losses).to(DEVICE)
                weights_tensor = torch.tensor(weights).to(DEVICE)
                weighted_losses = losses_tensor * weights_tensor
                loss = torch.sum(weighted_losses)
                loss_dict["total_loss"] = loss
                    
#             else:
#                 target_losses = [loss_dict[key] for key in tasks]
#                 loss = awl(*target_losses)
#                 loss_dict["total_loss"] = loss
            
            loss.backward()
            
            optimizer_model.step()
            scheduler.step()
            
#             if len(tasks) > 1:
#                 optimizer_loss.step()
            
            # Calculate Score
            train_main_labels = train_main_labels.cpu()
            main_output = torch.argmax(main_output, dim=-1).cpu()
            
            # Get the results and save them
            for loss_type, loss_value in loss_dict.items():
                if loss_type not in train_loss:
                    train_loss[loss_type] = loss_value.item()
                else:
                    train_loss[loss_type] += loss_value.item()
    
            train_true = torch.cat([train_true, train_main_labels])
            train_pred = torch.cat([train_pred, main_output])
            
        # Calculate Metrics
        train_accuracy = accuracy_score(train_true, train_pred)
        train_precision = precision_score(train_true, train_pred, average='macro')
        train_recall = recall_score(train_true, train_pred, average='macro')
        train_f1 = f1_score(train_true, train_pred, average='macro')
        
        for loss_type, loss_value in train_loss.items():
            train_loss[loss_type] = loss_value / len(train_dataloader)
             
        # cal avg train loss and acc
        train_accuracy_list.append(train_accuracy)
        train_precision_list.append(train_precision)
        train_recall_list.append(train_recall)
        train_f1_list.append(train_f1)
        
        for loss_type, loss_value in train_loss.items():
            if loss_type not in train_loss_dict:
                train_loss_dict[loss_type] = [loss_value,]
            else:
                train_loss_dict[loss_type].append(loss_value)
        
        # ===============================================================================
        
        # Set the state of the model to "Evaluation"
        model.eval()
        
        # To save the validation result
        valid_loss = {}
        valid_true = torch.tensor([])
        valid_pred = torch.tensor([])
        
        # Validation
        with torch.no_grad():
            
            # Validation with batches
            for valid_embeddings, valid_main_labels, valid_future_DA_labels, valid_sentiment_labels, valid_DA_labels, valid_speaker1_labels, valid_speaker2_labels, valid_speaking_order in tqdm(valid_dataloader, disable=(not verbose)):
                
                # Feed the data into the model
                valid_embeddings = valid_embeddings.to(DEVICE)
                valid_main_labels = valid_main_labels.to(DEVICE)
                valid_future_DA_labels = valid_future_DA_labels.to(DEVICE)
                valid_sentiment_labels = valid_sentiment_labels.to(DEVICE)
                valid_DA_labels = valid_DA_labels.to(DEVICE)
                valid_speaker1_labels = valid_speaker1_labels.to(DEVICE)
                valid_speaker2_labels = valid_speaker2_labels.to(DEVICE)
                valid_speaking_order = valid_speaking_order.to(DEVICE)
                
                main_output, future_DA_output, sentiment_output, DA_output, speaker1_output, speaker2_output = model(valid_embeddings, valid_speaking_order)
                
                # Reshape sequence output and labels
                sentiment_output = sentiment_output.view(-1, 3)
                valid_sentiment_labels = valid_sentiment_labels.view(-1)
                
                # Reshape sequence output and labels
                DA_output = DA_output.view(-1, 19)
                valid_DA_labels = valid_DA_labels.view(-1)
                
                loss_dict = my_utils.multi_task_loss(
                    main_output, 
                    future_DA_output,
                    sentiment_output, 
                    DA_output, 
                    speaker1_output, 
                    speaker2_output, 
                    valid_main_labels,
                    valid_future_DA_labels,
                    valid_sentiment_labels, 
                    valid_DA_labels, 
                    valid_speaker1_labels, 
                    valid_speaker2_labels,
                )
                
                if len(tasks) == 1:
                    loss = loss_dict["total_loss"]
#                 else:
#                     target_losses = [loss_dict[key] for key in tasks]
#                     loss = awl(*target_losses)
#                     loss_dict["total_loss"] = loss

                # Calculate Score
                valid_main_labels = valid_main_labels.cpu()
                main_output = torch.argmax(main_output, dim=-1).cpu()

                # Get the results and save them
                for loss_type, loss_value in loss_dict.items():
                    if loss_type not in valid_loss:
                        valid_loss[loss_type] = loss_value.item()
                    else:
                        valid_loss[loss_type] += loss_value.item()
                        
                valid_true = torch.cat([valid_true, valid_main_labels])
                valid_pred = torch.cat([valid_pred, main_output])

        # Calculate Metrics
        valid_accuracy = accuracy_score(valid_true, valid_pred)
        valid_precision = precision_score(valid_true, valid_pred, average='macro')
        valid_recall = recall_score(valid_true, valid_pred, average='macro')
        valid_f1 = f1_score(valid_true, valid_pred, average='macro')
        
        for loss_type, loss_value in valid_loss.items():
            valid_loss[loss_type] = loss_value / len(valid_dataloader)
             
        # cal avg val loss and acc
        valid_accuracy_list.append(valid_accuracy)
        valid_precision_list.append(valid_precision)
        valid_recall_list.append(valid_recall)
        valid_f1_list.append(valid_f1)
        
        for loss_type, loss_value in valid_loss.items():
            if loss_type not in valid_loss_dict:
                valid_loss_dict[loss_type] = [loss_value,]
            else:
                valid_loss_dict[loss_type].append(loss_value)
        
        if verbose:
            # Print the result of each epoch
            print(
                f"Epochs: {epoch_num + 1} \
                | Train Loss: {train_loss['total_loss']: .3f} | Train F1: {train_f1: .3f} | Train Precision: {train_precision: .3f} | Train Recall: {train_recall: .3f} \
                | Valid Loss: {valid_loss['total_loss']: .3f} | Valid F1: {valid_f1: .3f} | Valid Precision: {valid_precision: .3f} | Valid Recall: {valid_recall: .3f}")
            # print(awl.params)
        
        # Early Stopping
        if valid_f1 <= best_f1:
            trigger_times += 1
            if verbose:
                print('Trigger times:', trigger_times)

            if trigger_times > PATIENCE:
                if verbose:
                    print('Early stopping! Start the test process.')
                break
        else:
            if verbose:
                print('Trigger times: 0')
            trigger_times = 0
            best_f1 = valid_f1
            torch.save(model.state_dict(), MODEL_PATH)
        
    return model, train_precision_list, valid_precision_list, train_recall_list, valid_recall_list, \
            train_f1_list, valid_f1_list, train_loss_dict, valid_loss_dict

In [41]:
def test(model, test_df, test_embedding):

    # Test Dataset
    test = Dataset(df=test_df, embeddings=test_embedding)
    
    # Test DataLoader
    test_dataloader = DataLoader(test, batch_size=32, shuffle=False)

    # Run the model on GPU
    model = model.to(DEVICE)
        
    # Set the state of the model to "Evaluation"
    model.eval()

    # To save the test result
    test_true = torch.tensor([])
    test_pred = torch.tensor([])

    # Test
    with torch.no_grad():

        # Test with batches
        for test_embeddings, test_main_labels, _, _, _, _, _, test_speaking_order in tqdm(test_dataloader):

            # Feed the data into the model
            test_embeddings = test_embeddings.to(DEVICE)
            test_main_labels = test_main_labels.to(DEVICE)
            test_speaking_order = test_speaking_order.to(DEVICE)
            main_output, _, _, _, _, _ = model(test_embeddings, test_speaking_order)
            main_output = torch.squeeze(main_output)
            
            # Calculate Score
            test_main_labels = test_main_labels.cpu()
            main_output = torch.argmax(main_output, dim=-1).cpu()

            # Save the result of each batch
            test_true = torch.cat([test_true, test_main_labels])
            test_pred = torch.cat([test_pred, main_output])

        # Calculate Metrics
        test_accuracy = accuracy_score(test_true, test_pred)
        test_precision = precision_score(test_true, test_pred, average='macro')
        test_recall = recall_score(test_true, test_pred, average='macro')
        test_f1 = f1_score(test_true, test_pred, average='macro')
        
    print(f"test_accuracy: {round(test_accuracy * 100, 2)}%")
    print(f"test_precision: {round(test_precision * 100, 2)}%")
    print(f"test_recall: {round(test_recall * 100, 2)}%")
    print(f"test_f1: {round(test_f1 * 100, 2)}%")
    
    # Draw the result
    my_utils.plot_report(test_true, test_pred, list(LABEL_MAPPING.keys()))
    my_utils.plot_confusion_matrix(test_true, test_pred, list(LABEL_MAPPING.values()), list(LABEL_MAPPING.keys()))
        
    return test_accuracy, test_precision, test_recall, test_f1, test_true, test_pred

In [15]:
def cross_validation(tasks, tau, alpha, lr_model, batch_size, file_name):
    
    my_utils.set_seed()
    
    # File names
    true_tensor_file_path = f"./cross_validation_results/{file_name}_cross_validation_true_tensor.pt"
    pred_tensor_file_path = f"./cross_validation_results/{file_name}_cross_validation_pred_tensor.pt"
    
    # Read Original Dataframe
    train_df_original = pd.read_csv(TRAIN_DF_FILE_PATH)
    test_df_original = pd.read_csv(TEST_DF_FILE_PATH)
    train_test_df = pd.concat([train_df_original, test_df_original], axis=0)
    
    # K-fold Cross Validation (Split by TV_ID)
    k_fold = GroupKFold(n_splits=5)
    cross_validation_test_true, cross_validation_test_pred = torch.tensor([]), torch.tensor([])
    
    print(f"==============================================")
    print(f"Cross Validation on Tasks: {tasks}")
    print(f"----------------------------------------------")
    print(f"Tau = {tau}; Alpha = {alpha}; LR = {lr_model}; Batch-Size = {batch_size}")
    print(f"----------------------------------------------")
    
    with open(RESULT_FILE_PATH, "a") as file:
        file.write(f"==============================================\n")
        file.write(f"Cross Validation on Tasks: {tasks}\n")
        file.write(f"----------------------------------------------\n")
        file.write(f"Tau = {tau}; Alpha = {alpha}; LR = {lr_model}; Batch-Size = {batch_size}\n")
        file.write(f"----------------------------------------------\n")
    
    for split, (train_idx, test_idx) in enumerate(k_fold.split(train_test_df.index, groups=train_test_df.TV_ID)):
        
        my_utils.set_seed()
        
        train_df_split = train_test_df.iloc[train_idx.tolist()]
        test_df_split = train_test_df.iloc[test_idx.tolist()]
        train_df_split.to_csv(CROSS_VALIDATION_TRAIN_DF_FILE_PATH, index=False)
        test_df_split.to_csv(CROSS_VALIDATION_TEST_DF_FILE_PATH, index=False)
        assert len(set(train_df_split.TV_ID.unique()) & set(test_df_split.TV_ID.unique())) == 0

        train_df = my_utils.load_df(CROSS_VALIDATION_TRAIN_DF_FILE_PATH)
        valid_df = my_utils.load_df(VALID_DF_FILE_PATH)
        test_df = my_utils.load_df(CROSS_VALIDATION_TEST_DF_FILE_PATH)

        train_df, train_mapping = my_utils.get_dialogues_df(train_df, LABEL_COLUMN, MAPPING, MAPPING)
        valid_df, valid_mapping = my_utils.get_dialogues_df(valid_df, LABEL_COLUMN, MAPPING, MAPPING)
        test_df, test_mapping = my_utils.get_dialogues_df(test_df, LABEL_COLUMN, MAPPING, MAPPING)

        # Assert the mappings between all dataset are the same
        assert MAPPING == train_mapping == valid_mapping == test_mapping

        # 確保所有文本長度一致
        assert min([len(x) for x in train_df.context]) == max([len(x) for x in train_df.context]) == 8

        print("Train Data's Label Distribution:")
        print(train_df.labels.apply(lambda x: x["main_label"]).value_counts())
        print("=" * 35)
        print("Valid Data's Label Distribution:")
        print(valid_df.labels.apply(lambda x: x["main_label"]).value_counts())
        print("=" * 35)
        print("Test Data's Label Distribution:")
        print(test_df.labels.apply(lambda x: x["main_label"]).value_counts())
        print("=" * 35)
        
        print(f"Split: {split+1}")
        print(f"Total data points: {train_df_split.shape[0] + test_df_split.shape[0]}, Each Set: {train_df_split.shape[0]} / {test_df_split.shape[0]}")

        # Comment these lines if we do not need to calculate the embeddings again
        train_embedding = my_utils.get_dialogues_embedding(train_df, TOKENIZER, BERT, DEVICE)
        valid_embedding = my_utils.get_dialogues_embedding(valid_df, TOKENIZER, BERT, DEVICE)
        test_embedding = my_utils.get_dialogues_embedding(test_df, TOKENIZER, BERT, DEVICE)

        train(tasks=tasks, tau=tau, alpha=alpha, lr_model=lr_model, batch_size=batch_size, train_df=train_df, valid_df=valid_df, train_embedding=train_embedding, valid_embedding=valid_embedding, verbose=False)
        
        model = Model()
        model.load_state_dict(torch.load(MODEL_PATH))
        
        test_accuracy, test_precision, test_recall, test_f1, test_true, test_pred = test(model, test_df, test_embedding)
        
        with open(RESULT_FILE_PATH, "a") as file:
            file.write(f"Split {split+1}: test_f1 = {round(test_f1 * 100, 2)}%\n")
            
        cross_validation_test_true = torch.cat([cross_validation_test_true, test_true])
        cross_validation_test_pred = torch.cat([cross_validation_test_pred, test_pred])
    
    # Show and save Final Result
    test_accuracy = accuracy_score(cross_validation_test_true, cross_validation_test_pred)
    test_precision = precision_score(cross_validation_test_true, cross_validation_test_pred, average='macro')
    test_recall = recall_score(cross_validation_test_true, cross_validation_test_pred, average='macro')
    test_f1 = f1_score(cross_validation_test_true, cross_validation_test_pred, average='macro')
        
    print(f"test_accuracy: {round(test_accuracy * 100, 2)}%")
    print(f"test_precision: {round(test_precision * 100, 2)}%")
    print(f"test_recall: {round(test_recall * 100, 2)}%")
    print(f"test_f1: {round(test_f1 * 100, 2)}%")
    
    # Draw the result
    my_utils.plot_report(cross_validation_test_true, cross_validation_test_pred, list(LABEL_MAPPING.keys()))
    my_utils.plot_confusion_matrix(cross_validation_test_true, cross_validation_test_pred, list(LABEL_MAPPING.values()), list(LABEL_MAPPING.keys()))
    
    with open(RESULT_FILE_PATH, "a") as file:
        file.write(f"----------------------------------------------\n")
        file.write(f"Final test_accuracy: {round(test_accuracy * 100, 2)}%\n")
        file.write(f"Final test_precision: {round(test_precision * 100, 2)}%\n")
        file.write(f"Final test_recall: {round(test_recall * 100, 2)}%\n")
        file.write(f"Final test_f1: {round(test_f1 * 100, 2)}%\n")
        file.write(f"==============================================\n\n")
    
    torch.save(cross_validation_test_true, true_tensor_file_path)
    torch.save(cross_validation_test_pred, pred_tensor_file_path)


The shape of the Dataset: (94187, 18)
The number of dialogues: 8086
   TV_ID Dialogue_ID Utterance_ID Speaker  Gender          Age Neuroticism  \
0      1      01_000   01_000_000     童文洁  female  middle-aged        high   
1      1      01_000   01_000_001     童文洁  female  middle-aged        high   
2      1      01_000   01_000_002      刘静  female  middle-aged         low   
3      1      01_000   01_000_003     童文洁  female  middle-aged        high   
4      1      01_001   01_001_000      刘静  female  middle-aged         low   

  Extraversion Openness Agreeableness Conscientiousness        Scene  \
0         high      low           low              high  other-venue   
1         high      low           low              high  other-venue   
2         high     high          high              high  other-venue   
3         high      low           low              high  other-venue   
4         high     high          high              high  other-venue   

  FacePosition_LU FacePosition

# Cross Validation

In [None]:
with open(RESULT_FILE_PATH, "w") as file:
    file.write(f"==============================================\n")
    
experiment_1 = {
    "tasks": ["main_loss", ],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 0.0,
    "alpha": 0.0,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"single_task{TASK_NUM}",
}

experiment_2 = {
    "tasks": ["main_loss", "DA_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(seq){TASK_NUM}",
}

experiment_3 = {
    "tasks": ["main_loss", "sentiment_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"sentiment(seq){TASK_NUM}",
}

experiment_4 = {
    "tasks": ["main_loss", "future_DA_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(future){TASK_NUM}",
}

experiment_5 = {
    "tasks": ["main_loss", "future_DA_loss", "sentiment_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(future)_sentiment(seq){TASK_NUM}",
}

experiment_6 = {
    "tasks": ["main_loss", "future_DA_loss", "DA_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(future)_da(seq){TASK_NUM}",
}

experiment_7 = {
    "tasks": ["main_loss", "DA_loss", "sentiment_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(seq)_sentiment(seq){TASK_NUM}",
}

experiment_8 = {
    "tasks": ["main_loss", "future_DA_loss", "DA_loss", "sentiment_loss"],  #  "future_DA_loss", "sentiment_loss", "DA_loss"
    "tau": 1.0,
    "alpha": 0.5,
    "lr_model": 5e-5,
    "batch_size": 32,
    "file_name": f"da(future)_da(seq)_sentiment(seq){TASK_NUM}",
}

cross_validation(**experiment_1)
cross_validation(**experiment_2)
cross_validation(**experiment_3)
cross_validation(**experiment_4)
cross_validation(**experiment_5)
cross_validation(**experiment_6)
cross_validation(**experiment_7)
cross_validation(**experiment_8)