In [1]:
# Mount My Google Drive files
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
# Install required modules
!pip install transformers
!pip install torch
!pip install openpyxl
!pip3 install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 4.2 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 12.8 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 56.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 82.3 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling P

In [3]:
# Monitor and manage the GPU's on the system
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Thu Aug 18 23:26:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    27W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [4]:
# Import libraries
import pandas as pd
pd.options.mode.chained_assignment = None 
import numpy as np
import random
from sklearn.metrics import f1_score
import torch
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification,BertTokenizer,BertModel,AutoModel, AutoTokenizer,AdamW, get_linear_schedule_with_warmup
from tqdm.notebook import tqdm
import altair as alt
alt.renderers.enable('default')
import warnings
warnings.filterwarnings('ignore')
no_deprecation_warning=True

In [5]:
# Set value for MAX_LEN, and RANDOM_SEED
MAX_LEN = 768
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

# Set current device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [6]:
# Read reviews label dataset
train_review_df = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/train reviews.xlsx',
                                engine='openpyxl',
                                sheet_name ='train', 
                                skiprows=0)
train_review_df = train_review_df.rename(columns={'Review Content':'Vietnamese'})
print(train_review_df.shape)
train_review_df_before_oversample = train_review_df.copy()

print('Count of individual class before oversamping negative class:')
print(train_review_df['emotion'].value_counts())

(6018, 4)
Count of individual class before oversamping negative class:
positive    5250
negative     499
neutral      269
Name: emotion, dtype: int64


In [24]:
train_review_df_count_df = pd.DataFrame(train_review_df_before_oversample['emotion'].value_counts())
train_review_df_count_df.reset_index(inplace=True)
train_review_df_count_df = train_review_df_count_df.rename(columns={'index': 'emotion', 'emotion':'count_emotion'})
train_review_df_count_df['percent'] = round(train_review_df_count_df['count_emotion']/train_review_df.shape[0] *100,0)
train_review_df_count_df['percent1'] = train_review_df_count_df['percent'].astype(int).astype(str)
train_review_df_count_df['percent2'] = train_review_df_count_df['percent'].astype(int).astype(str) + '%'
train_review_df_count_df

Unnamed: 0,emotion,count_emotion,percent,percent1,percent2
0,positive,5250,87.0,87,87%
1,negative,499,8.0,8,8%
2,neutral,269,4.0,4,4%


In [31]:
base = alt.Chart(train_review_df_count_df).mark_bar(size=20).encode(
            x = alt.X('emotion:N', title=""),
            y = alt.Y("count_emotion:Q", title="", axis=None),
            color = alt.Color("emotion:N", legend=None), 
            tooltip = ['emotion', 'count_emotion']
            ).properties(width=200,height=100)
            

text = base.mark_text(size=12, dx=0, dy=-10).encode(text='count_emotion:Q')
                    
text2 = base.mark_text(size=12, dx=0, dy=-25).encode(text='percent2:N')
title = alt.Chart({"values": [{"text": "Imbalanced Emotion Classes"}]}
                      ).mark_text(size=15, dx=0, dy=0, color="black"
                      ).encode(text='text:N'
                      ).properties(width=200, height=30)


label_graph = (title & (base + text + text2)
              ).configure_axis(grid=False
              ).configure_view(strokeWidth=0
              ).configure_axis(labelFontSize = 12,
                               titleFontSize = 12)

label_graph

In [None]:
# There are imbalance classes in the review label dataset. Oversample the negative class data to handle imblance class issue
k_neg = len(train_review_df[train_review_df['emotion']=='positive'])\
        -len(train_review_df[train_review_df['emotion']=='negative'])

new_index_neg = random.choices(train_review_df[train_review_df['emotion']=='negative']['index'].values, 
                             k=k_neg)
df_add_neg = pd.DataFrame(new_index_neg,columns=['index'])

# Merge addtional negative class data generated by oversampling to original reviews label dataset
df_add_neg_combined = pd.merge(df_add_neg,train_review_df,
                             how='left',
                             on=['index'])
train_review_df = train_review_df.append(df_add_neg_combined).reset_index()
train_review_df.drop(['level_0'], axis=1, inplace=True)
print('\nCount of individual class after oversamping negative class:')
print(train_review_df['emotion'].value_counts())

In [None]:
 # Create dictionary for class labels
label_dict = {'positive': 2, 'neutral': 1, 'negative': 0}
label_dict_inverse = {v: k for k, v in label_dict.items()}


def add_label_to_df(df):

    """Create function add_label_to_df to add label to reviews label dataset."""  


    df['label'] = df['emotion'].replace(label_dict)
    df = df[['index','Vietnamese','emotion','label']]
    return df


def data_split(df):

    """
    Create function data_split to 
    1. splict review lable dataset into train and validation data. 
    2. stratify the data to handle imbalance class issue.
    """


    df=add_label_to_df(df)
    X_train, X_val, y_train, y_val = train_test_split(df.index.values,
                                                      df['label'].values,
                                                      test_size=0.20,
                                                      random_state=RANDOM_SEED,
                                                      stratify=df['label'].values)
    return X_train, X_val, y_train, y_val


def set_data_category_in_df(df): 

    """Create function set_data_category_in_df to set data categary inside the reviews label data."""


    X_train, X_val, y_train, y_val = data_split(df)
    df['data_category'] = ['unset']*df.shape[0]
    df.loc[X_train, 'data_category'] = 'train'
    df.loc[X_val, 'data_category'] = 'val'
    return df

In [None]:
def encode_data_and_prepare_dataset(df):

    """
    Create function encode_data_and_prepare_dataset to 
    1. encode train data, validation data. 
    2. set input_ids, attention_masks and labels for train and validationd data.
    3. generate train and validation dataset.
    """


    df = set_data_category_in_df(df)
    encoded_data_train = tokenizer.batch_encode_plus(df[df.data_category=='train'].Vietnamese.values,
                                                     add_special_tokens=True,
                                                     return_attention_mask=True,
                                                     padding=True,
                                                     return_tensors='pt')

    encoded_data_val = tokenizer.batch_encode_plus(df[df.data_category=='val'].Vietnamese.values,
                                                   add_special_tokens=True,
                                                   return_attention_mask=True,
                                                   padding=True,
                                                   return_tensors='pt')

    input_ids_train = encoded_data_train['input_ids']
    attention_masks_train = encoded_data_train['attention_mask']
    labels_train = torch.tensor(df[df.data_category=='train']['label'].values) 

    input_ids_val = encoded_data_val['input_ids']
    attention_masks_val = encoded_data_val['attention_mask']
    labels_val = torch.tensor(df[df.data_category=='val']['label'].values) 

    dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
    dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
    return dataset_train,dataset_val 

In [None]:
def build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob):

    """Build BERT model"""


    model = BertForSequenceClassification.from_pretrained(pre_trained_model, 
                                                          num_labels=len(label_dict),
                                                          output_attentions=False,
                                                          output_hidden_states=False,
                                                          hidden_dropout_prob = hidden_dropout_prob,
                                                          attention_probs_dropout_prob = attention_probs_dropout_prob
                                                          )
    return model


def build_dataloader(df,batch_size):

    """Build train and validation dataloader."""


    dataset_train, dataset_val  =encode_data_and_prepare_dataset(df)
    dataloader_train = DataLoader(dataset_train, 
                                  sampler=RandomSampler(dataset_train), 
                                  batch_size=batch_size)
    dataloader_validation = DataLoader(dataset_val, 
                                      sampler=SequentialSampler(dataset_val), 
                                      batch_size=batch_size)
    return dataloader_train,dataloader_validation


def setup_optimizer(Ir,eps):

    """Setup optimizer."""


    optimizer = AdamW(model.parameters(),
                      lr=Ir, 
                      eps=eps)
    return optimizer


def setup_scheduler(optimizer,Ir,eps,epochs):

    """Setup scheduler."""


    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps=0,
                                                num_training_steps=len(dataloader_train)*epochs)
    return scheduler

In [None]:
def evaluate(dataloader_validation):

    """Create evaluate function to evaluate BERT model."""


    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_validation:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_validation) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [None]:
def train_model(Ir,eps,epochs,batch_size,dataloader_validation):

    """
    Create train_model function to Train BERT model.
    Output trained BERT model and eval_df dataset for trained BERT information including model metrics.
    """


    epoch_list=[]
    train_loss=[]
    validation_loss=[]
    F1_score_weighted=[]
    F1_score_macro=[]
    F1_score_micro=[]
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    print(device)

    optimizer = setup_optimizer(Ir,eps)
    
    scheduler = setup_scheduler(optimizer,Ir,eps,epochs)
    
    for epoch in tqdm(range(1, epochs+1)):
        
        epoch_list.append(epoch)
        model.train()
        
        loss_train_total = 0

        progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    
        for batch in progress_bar:

            model.zero_grad()
            
            batch = tuple(b.to(device) for b in batch)

            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'labels':         batch[2],
                    }       

            outputs = model(**inputs)
          
            loss = outputs[0]
            loss_train_total += loss.item()
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            scheduler.step()

            progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
            
        torch.save(model.state_dict(), f'emotion_{model_type}_NLP_{epoch}.model')
            
        tqdm.write(f'\nEpoch {epoch}')
        
        loss_train_avg = loss_train_total/len(dataloader_train)            
        tqdm.write(f'Training loss: {loss_train_avg}')
        train_loss.append(loss_train_avg)

        val_loss, predictions, true_vals = evaluate(dataloader_validation) 
        val_f1_weighted = f1_score_func(predictions, true_vals,'weighted')
        val_f1_macro = f1_score_func(predictions, true_vals,'macro')
        val_f1_micro = f1_score_func(predictions, true_vals,'micro')
        validation_loss.append(val_loss)
        F1_score_weighted.append(val_f1_weighted)
        F1_score_macro.append(val_f1_macro)
        F1_score_micro.append(val_f1_micro)

        tqdm.write(f'Validation loss: {val_loss}')
        tqdm.write(f'F1 Score (Weighted): {val_f1_weighted}')
        tqdm.write(f'F1 Score (macro): {val_f1_macro}')
        tqdm.write(f'F1 Score (micro): {val_f1_micro}')

        # Create dataframe for BERT model evaluation metrics
        eval_df = pd.DataFrame()
        eval_df['emotion'] = ['positive, negative, neutral'] * len(epoch_list)
        eval_df['epoch'] = epoch_list
        eval_df['train_loss'] = train_loss
        eval_df['val_loss'] = validation_loss
        eval_df['F1_score_weighted'] = F1_score_weighted
        eval_df['F1_score_macro'] = F1_score_macro
        eval_df['F1_score_micro'] = F1_score_micro
        eval_df['batch_size'] = batch_size
        eval_df['Ir'] = Ir
        eval_df['eps'] = eps
        eval_df['pre_trained_model'] = pre_trained_model
        eval_df['hidden_dropout_prob'] = hidden_dropout_prob
        eval_df['attention_probs_dropout_prob'] = attention_probs_dropout_prob  
        eval_df['note'] = 'Added and splited Self-judged Review emotions into train and valiation by 8:2'
        eval_df['train_data_creator'] = train_data_provider
        eval_df['train_data_type'] = train_data_type 
    return eval_df, model

In [None]:
"""
Initiate eval_df_path_list, model_path_list, accuracy_per_class_path_list, 
best_epoch_F1_score_macro_list, and best_model_path_list
"""

eval_df_path_list = []
model_path_list = []
accuracy_per_class_path_list = []
best_epoch_F1_score_macro_list = []
best_model_path_list = []
train_data_type_list = []

In [None]:
def f1_score_func(preds, labels, average):

    """Create f1_score_func to calculate F1 score."""


    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average=average)


def accuracy_per_class(preds, labels):    

    """
    Create accuracy_per_class function to generate a dataframe 
    for prediction accuracy of individual class
    """


    class_list = []
    score_list = []
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    print("Prediction accuracy for individual class:")
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        acc= len(y_preds[y_preds==label])/len(y_true)
        class_list.append(label_dict_inverse[label])
        score_list.append(len(y_preds[y_preds==label])/len(y_true))     
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}, ',\
              '{:.3f}'.format(acc),'\n')
    
    # Create dataframe to hold information about the BERT model
    df = pd.DataFrame(class_list,columns=['class'])
    df['score'] = score_list
    df['pre_trained_model'] = model_type
    df['epoch'] = epochs
    df['train_data_creator'] = train_data_provider
    df['train_data_type'] = train_data_type
    return df


def predict_text(input_text):

    """Create predict_text function to predict text"""


    inputs = tokenizer(input_text.lower(), return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_id = logits.argmax().item()
    return label_dict_inverse[predicted_class_id]


def predict_emotion_test():

    """"Create predict_emotion_test function to test trained BERT model prediction performance."""


    test_text_product = 'Sản phẩm rất tốt, và mạnh mẽ'    # Very good product, and powerful
    test_text_service = 'Tôi cần ai đó hỗ trợ tôi cách sử dụng' # I need someone to help me how to use it
    test_text_logistic = 'giao hàng quá chậm' # delivery is too slow
    print(" ")
    print("Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ': ", predict_text(test_text_product))
    print("Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng': ", predict_text(test_text_service))
    print("Predict emotion for 'giao hàng quá chậm': ", predict_text(test_text_logistic))


def prepare_review_data():

    """
    Clean and prepare reviews label dataset. 
    Output reviews label datasets before and after removing null value.
    """


    review_df = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/reviews.xlsx',
                              engine='openpyxl',
                              sheet_name ='LZD R&R raw')
    print("\nraw review dataset:",review_df.shape)

    review_df = review_df.reset_index()
    review_df_1 = review_df[['index','Review Content','Rating']].dropna(how='any')
    print("raw review dataset after dropping null value:", review_df_1.shape)
    return review_df,review_df_1


def generate_predition_data():

    """Create generate_predition_data function to predict customer reviews emotion."""


    review_df,review_df1 = prepare_review_data()
    review_df1['emotion'] = review_df1['Review Content'].apply(predict_text)
    review_emotion_prediction = pd.merge(review_df,review_df1[['index','emotion']], how='left', on='index')
    review_emotion_prediction_path = f'/content/drive/MyDrive/Realtime Dreamer/review_emotion_prediction.csv'
    review_emotion_prediction.to_csv(review_emotion_prediction_path, index=False)
    return review_emotion_prediction


def prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list):

    """
    Create prepare_model_df function to generate model_df dataframe to 
    hold the list of paths for eval_df, accuracy_per_class, trained model, 
    and best epoch for the highest F1 macro score of the trained BERT model.
    """


    model_df = pd.DataFrame()
    model_df['eval_df_path'] = eval_df_path_list
    model_df['accuracy_per_class_path'] = accuracy_per_class_path_list
    model_df['model_path'] = model_path_list
    model_df['best_epoch_F1_score_macro'] = best_epoch_F1_score_macro_list
    model_df['train_data_type'] = train_data_type_list
    model_df.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/model_info.csv', index=False)

    return model_df


def draw_F1_scores(df, domain, width, height, first_line_title, second_line_title,epochs):

    """
    Create draw_F1_scores function to plot graph for F1 scores (macro, micro and weighted), train and validation losses.
    """


    if epochs == 1:
        base = alt.Chart(df).mark_point()
    else:
        base = alt.Chart(df).mark_line()

    graph=base.encode(
        x='epoch',
        y=alt.Y("value:Q", title="", scale=alt.Scale(domain=domain)),
        color = alt.Color('line type:N',
                        legend=alt.Legend(
                        title="Metrics",
                        orient='right',
                        titleFontSize=11,
                        titleColor='black',
                        labelFontSize=10.5,
                        labelColor='black',
                        direction='vertical')),
        tooltip = ['emotion', 'epoch', 'line type', 'value']
        ).interactive(
        ).properties(
            width=width,
            height=height,
            title={"text": [first_line_title, second_line_title], "color": "black"})
    return graph


def draw_prediction_accuracy(df, domain, width, height, first_line_title, second_line_title):

    """
    Create draw_prediction_accuracy function to plot graph 
    for prediction accuracy of individual class by different trained BERT models
    """


    base = alt.Chart(df).mark_bar().encode(
        x = alt.X('class:N', axis=alt.Axis(labelAngle=360))
        ).properties(
            width=width,
            height=height,
            title={"text": [first_line_title,second_line_title], "color": "black"})

    graph = base.mark_bar(size=20).encode(
        y = alt.Y("score:Q", title="", scale=alt.Scale(domain=domain)),
        color = alt.Color('class:N', legend=None),
        tooltip = ['pre_trained_model', 'class', 'score', 'epoch', 'train_data_creator']
        ).interactive()
    return graph

# Set df_f1_scores_id_vars, df_f1_scores_value_vars and df_f1_scores_var_name for arguments in pd.melt function. 
df_f1_scores_id_vars = ['emotion', 'epoch', 'batch_size', 'Ir', 'eps', 'pre_trained_model', 'hidden_dropout_prob', 
                        'attention_probs_dropout_prob', 'note', 'train_data_creator']
df_f1_scores_value_vars = ['train_loss', 'val_loss', 'F1_score_weighted', 'F1_score_macro', 'F1_score_micro']   
df_f1_scores_var_name = ['line type']  


def prepare_data_for_metrics_graphs(df, best_model=False):

    """
    Create prepare_data_for_metrics_graphs function to output 
    df_f1_scores_long dataframe with list of F1 scores(macro, micro and weighted) 
    for either 1 to the best epoch of the highest F1 macro score of the trained BERT model 
    or 1 to 10 epoches of the trained BERT model
    """


    if best_model:
        df_f1_scores = df[df['epoch']<=best_epoch_F1_score_macro]
    else:
        df_f1_scores = df.copy()
    df_f1_scores_long = pd.melt(df_f1_scores, 
                                id_vars=df_f1_scores_id_vars, 
                                value_vars=df_f1_scores_value_vars,
                                var_name=df_f1_scores_var_name)
    return df_f1_scores_long


def prepare_metrics_graphs(df_f1_scores_long, df_class_accuracy,domain,width,height,epochs):

    """
    Create prepare_metrics_graphs function to output two graphs:
    1.  F1_scores_graph: plot F1 scores(macro, micro and weighted), train and validation losses for different trained BERT models
    2.  prediction_accuracy_graph: plot prediction accuracy of individual class for different trained BERT models
    """


    F1_scores_graph = draw_F1_scores(df_f1_scores_long,
                                     domain,
                                     width,
                                     height,  
                                     "F1 scores (Macro, Micro, Weighted)",                            
                                     "Train loss, Validation loss",
                                     epochs) 
    
    prediction_accuracy_graph = draw_prediction_accuracy(df_class_accuracy,
                                                         domain,
                                                         width,
                                                         height,                                               
                                                         "Accuracy Per Class",
                                                         "Pre-Trained Model: " \
                                                         + df_class_accuracy['pre_trained_model'][1])
    return F1_scores_graph, prediction_accuracy_graph

In [None]:
# pre_trained_model: 'trituenhantaoio/bert-base-vietnamese-uncased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'trituenhantaoio/bert-base-vietnamese-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'
train_data_type = 'Yunhong Keyword + oversample'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5250
0    5250
1     269
Name: label, dtype: int64


Downloading tokenizer_config.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/422M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.12605040880550367
Validation loss: 0.08155199555956104
F1 Score (Weighted): 0.9883929420297631
F1 Score (macro): 0.9891079320371926
F1 Score (micro): 0.9883936861652739


Epoch 2:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.025360395325443246
Validation loss: 0.04013586641780825
F1 Score (Weighted): 0.9921052309735612
F1 Score (macro): 0.9916464305720946
F1 Score (micro): 0.9921077065923862


Epoch 3:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.012088829498828332
Validation loss: 0.03541097691761226
F1 Score (Weighted): 0.9948910635568619
F1 Score (macro): 0.9935514094242944
F1 Score (micro): 0.9948932219127206


Epoch 4:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.00860917299711802
Validation loss: 0.03973696438921184
F1 Score (Weighted): 0.9925714469969845
F1 Score (macro): 0.9919652335290637
F1 Score (micro): 0.9925719591457753


Epoch 5:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.004558294640905514
Validation loss: 0.040779523517320976
F1 Score (Weighted): 0.9948956679896737
F1 Score (macro): 0.987807118988273
F1 Score (micro): 0.9948932219127206


Epoch 6:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.0033390524433127688
Validation loss: 0.03301420939668013
F1 Score (Weighted): 0.9958201382264406
F1 Score (macro): 0.99128639152232
F1 Score (micro): 0.9958217270194986


Epoch 7:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.0023459733783088494
Validation loss: 0.040486612738432425
F1 Score (Weighted): 0.9948911469499314
F1 Score (macro): 0.9906511384399073
F1 Score (micro): 0.9948932219127206


Epoch 8:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.001588548431856286
Validation loss: 0.03136443080514221
F1 Score (Weighted): 0.9962838875077662
F1 Score (macro): 0.9945038357069129
F1 Score (micro): 0.9962859795728877


Epoch 9:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0011329849673457123
Validation loss: 0.031865718687792345
F1 Score (Weighted): 0.9962838875077662
F1 Score (macro): 0.9945038357069129
F1 Score (micro): 0.9962859795728877


Epoch 10:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.001235533796750229
Validation loss: 0.03444850952927717
F1 Score (Weighted): 0.9962838875077662
F1 Score (macro): 0.9945038357069129
F1 Score (micro): 0.9962859795728877
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1043/1050,  0.993 

best epoch for the best F1 score (macro):  8


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'trituenhantaoio/bert-base-vietnamese-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1043/1050,  0.993 




In [None]:
prediction_accuracy_graph

In [None]:

# pre_trained_model: 'NlpHUST/vibert4news-base-cased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'NlpHUST/vibert4news-base-cased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'
train_data_type = 'Yunhong Keyword + oversample'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5250
0    5250
1     269
Name: label, dtype: int64


Downloading vocab.txt:   0%|          | 0.00/402k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RobertaTokenizer'. 
The class this function is called from is 'BertTokenizer'.


Downloading pytorch_model.bin:   0%|          | 0.00/512M [00:00<?, ?B/s]

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.197796515755421
Validation loss: 0.0597225987645625
F1 Score (Weighted): 0.9879257645817046
F1 Score (macro): 0.9887884716489012
F1 Score (micro): 0.9879294336118849


Epoch 2:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.04925591151376042
Validation loss: 0.032746181412410265
F1 Score (Weighted): 0.9944267773314571
F1 Score (macro): 0.9932339260815891
F1 Score (micro): 0.9944289693593314


Epoch 3:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.02524122384722182
Validation loss: 0.04010445558567342
F1 Score (Weighted): 0.9939624842660493
F1 Score (macro): 0.9929164380616244
F1 Score (micro): 0.9939647168059424


Epoch 4:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.014091063304171432
Validation loss: 0.038875270062190895
F1 Score (Weighted): 0.9930367384402358
F1 Score (macro): 0.9865359652582764
F1 Score (micro): 0.9930362116991643


Epoch 5:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.01352475747713227
Validation loss: 0.06597012696201326
F1 Score (Weighted): 0.9907121882362431
F1 Score (macro): 0.9906938546812428
F1 Score (micro): 0.9907149489322191


Epoch 6:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.0052809914923408014
Validation loss: 0.04296713050347602
F1 Score (Weighted): 0.9944267773314571
F1 Score (macro): 0.9932339260815891
F1 Score (micro): 0.9944289693593314


Epoch 7:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.004065915845309799
Validation loss: 0.03823500360624705
F1 Score (Weighted): 0.9953553435737479
F1 Score (macro): 0.9938688885215555
F1 Score (micro): 0.9953574744661096


Epoch 8:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.003344047002575653
Validation loss: 0.04229389392559016
F1 Score (Weighted): 0.9944267773314571
F1 Score (macro): 0.9932339260815891
F1 Score (micro): 0.9944289693593314


Epoch 9:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0024111013330039392
Validation loss: 0.03853613958230266
F1 Score (Weighted): 0.9958196180135723
F1 Score (macro): 0.9941863638051687
F1 Score (micro): 0.9958217270194986


Epoch 10:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.001427926390246831
Validation loss: 0.03462622753476252
F1 Score (Weighted): 0.9962838875077662
F1 Score (macro): 0.9945038357069129
F1 Score (micro): 0.9962859795728877
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1043/1050,  0.993 

best epoch for the best F1 score (macro):  10


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1043/1050,  0.993 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'bert-base-uncased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'bert-base-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'
train_data_type = 'Yunhong Keyword + oversample'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5250
0    5250
1     269
Name: label, dtype: int64


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.26092971726977277
Validation loss: 0.0764957116658075
F1 Score (Weighted): 0.9828157560665213
F1 Score (macro): 0.9852941991594709
F1 Score (micro): 0.9828226555246053


Epoch 2:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.0454759348282359
Validation loss: 0.05243690231797734
F1 Score (Weighted): 0.9902481983414595
F1 Score (macro): 0.9903765739722384
F1 Score (micro): 0.9902506963788301


Epoch 3:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.028443453259321606
Validation loss: 0.06120397935425259
F1 Score (Weighted): 0.9897834346648439
F1 Score (macro): 0.9900587641438477
F1 Score (micro): 0.989786443825441


Epoch 4:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.016814924894751296
Validation loss: 0.0565817413097075
F1 Score (Weighted): 0.9916408942346958
F1 Score (macro): 0.9913289126878038
F1 Score (micro): 0.9916434540389972


Epoch 5:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.017798043935590358
Validation loss: 0.0309962924747163
F1 Score (Weighted): 0.9958196180135723
F1 Score (macro): 0.9941863638051687
F1 Score (micro): 0.9958217270194986


Epoch 6:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.010755819165581321
Validation loss: 0.05565914976339425
F1 Score (Weighted): 0.9921052309735612
F1 Score (macro): 0.9916464305720946
F1 Score (micro): 0.9921077065923862


Epoch 7:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.008435786205677814
Validation loss: 0.05053920632867462
F1 Score (Weighted): 0.9934981837291229
F1 Score (macro): 0.9925989449325643
F1 Score (micro): 0.9935004642525533


Epoch 8:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.005722714966139389
Validation loss: 0.05178095389653915
F1 Score (Weighted): 0.9934981837291229
F1 Score (macro): 0.9925989449325643
F1 Score (micro): 0.9935004642525533


Epoch 9:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.004014352846072433
Validation loss: 0.04856542642058533
F1 Score (Weighted): 0.9939624842660493
F1 Score (macro): 0.9929164380616244
F1 Score (micro): 0.9939647168059424


Epoch 10:   0%|          | 0/539 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.0024708018913897054
Validation loss: 0.04759633575886255
F1 Score (Weighted): 0.9939624842660493
F1 Score (macro): 0.9929164380616244
F1 Score (micro): 0.9939647168059424
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1038/1050,  0.989 

best epoch for the best F1 score (macro):  5


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'bert-base-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1050/1050,  1.000 

Class: neutral
Accuracy: 53/54,  0.981 

Class: positive
Accuracy: 1042/1050,  0.992 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'trituenhantaoio/bert-base-vietnamese-uncased' 
# use customer reviews label data before oversampling negative class

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df_before_oversample) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'trituenhantaoio/bert-base-vietnamese-uncased'
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'
model_type = pre_trained_model.split('/')[0]
train_data_type = 'Yunhong Keyword Search'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_before_oversample_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_before_oversample_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_before_oversample.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5250
0     499
1     269
Name: label, dtype: int64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.1675946513535857
Validation loss: 0.06994616932780973
F1 Score (Weighted): 0.9838811433234909
F1 Score (macro): 0.9638381837334716
F1 Score (micro): 0.9842192691029901


Epoch 2:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.06619871094702214
Validation loss: 0.05624172007389318
F1 Score (Weighted): 0.9852461361766727
F1 Score (macro): 0.9680084568948729
F1 Score (micro): 0.9850498338870431


Epoch 3:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.029229638547202622
Validation loss: 0.0912165901436789
F1 Score (Weighted): 0.9838811433234909
F1 Score (macro): 0.9638381837334716
F1 Score (micro): 0.9842192691029901


Epoch 4:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.010714272311942781
Validation loss: 0.08347775405950654
F1 Score (Weighted): 0.9846918353969352
F1 Score (macro): 0.9655774507358444
F1 Score (micro): 0.9850498338870431


Epoch 5:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.00822142904391899
Validation loss: 0.07064498836189159
F1 Score (Weighted): 0.987396600527021
F1 Score (macro): 0.9719836774468603
F1 Score (micro): 0.9875415282392026


Epoch 6:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.0035225203202958483
Validation loss: 0.08494828473818702
F1 Score (Weighted): 0.9867109634551495
F1 Score (macro): 0.9707936507936509
F1 Score (micro): 0.9867109634551495


Epoch 7:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.004402882197984546
Validation loss: 0.07699365727383242
F1 Score (Weighted): 0.9875131904372839
F1 Score (macro): 0.9724945527255507
F1 Score (micro): 0.9875415282392026


Epoch 8:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.0007597429970961197
Validation loss: 0.09608362423645495
F1 Score (Weighted): 0.9875131904372839
F1 Score (macro): 0.9724945527255507
F1 Score (micro): 0.9875415282392026


Epoch 9:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0006381429762206662
Validation loss: 0.0981103998280416
F1 Score (Weighted): 0.9875131904372839
F1 Score (macro): 0.9724945527255507
F1 Score (micro): 0.9875415282392026


Epoch 10:   0%|          | 0/301 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.0004934891848795911
Validation loss: 0.09858242438808941
F1 Score (Weighted): 0.9875131904372839
F1 Score (macro): 0.9724945527255507
F1 Score (micro): 0.9875415282392026
Prediction accuracy for individual class:
Class: negative
Accuracy: 92/100,  0.920 

Class: neutral
Accuracy: 54/54,  1.000 

Class: positive
Accuracy: 1043/1050,  0.993 

best epoch for the best F1 score (macro):  7


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
accuracy_per_class_df

Unnamed: 0,class,score,pre_trained_model,epoch,train_data_creator,train_data_type
0,negative,0.92,trituenhantaoio,10,Yunhong He,Yunhong Keyword Search
1,neutral,1.0,trituenhantaoio,10,Yunhong He,Yunhong Keyword Search
2,positive,0.993333,trituenhantaoio,10,Yunhong He,Yunhong Keyword Search


In [None]:
# 'trituenhantaoio/bert-base-vietnamese-uncased' Bert Model with best F1 score macro
# use customer reviews label data before oversampling negative class

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_before_oversample.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_before_oversample_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 92/100,  0.920 

Class: neutral
Accuracy: 54/54,  1.000 

Class: positive
Accuracy: 1043/1050,  0.993 




In [None]:
prediction_accuracy_graph

In [None]:
# Below train data was provided by Suwasit's Vietnamese Team

In [None]:
train_review_df_s = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/Tefal Lazada Product Reviews in TTL202207_Updated_Good_Bad.xlsx',
                                  engine='openpyxl', sheet_name='Database_LZD', skiprows=0)
train_review_df_s = train_review_df_s.rename(columns={'Review Content':'Vietnamese'}).reset_index()
emotion_dict = {'Good':'positive','Bad':'negative'}
train_review_df_s['emotion'] = train_review_df_s['Comment classified Type 1'].replace(emotion_dict)
train_review_df_s = train_review_df_s[['index','Vietnamese','Rating','emotion']][train_review_df_s['Vietnamese'].isnull()==False]
train_review_df_s['emotion'].fillna('neutral',inplace=True)
print(train_review_df_s.shape)

(22771, 4)


In [None]:
# pre_trained_model: 'trituenhantaoio/bert-base-vietnamese-uncased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'trituenhantaoio/bert-base-vietnamese-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'
train_data_type = 'Reviews-Suwasit Viet Team'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.21214111861580684
Validation loss: 0.1641000641019721
F1 Score (Weighted): 0.9414219040845545
F1 Score (macro): 0.8227867322728342
F1 Score (micro): 0.9418221734357849


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.1440958136252534
Validation loss: 0.174246416971692
F1 Score (Weighted): 0.9448215722975838
F1 Score (macro): 0.8275322097250096
F1 Score (micro): 0.9466520307354556


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.10850966894740355
Validation loss: 0.21282182361730456
F1 Score (Weighted): 0.9434517774362263
F1 Score (macro): 0.8223232657760379
F1 Score (micro): 0.9433589462129529


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.07515193546210074
Validation loss: 0.2461972324374712
F1 Score (Weighted): 0.9461564943789281
F1 Score (macro): 0.8329463878813254
F1 Score (micro): 0.9448957189901207


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.052014522626032726
Validation loss: 0.2954009263325644
F1 Score (Weighted): 0.9452489332745371
F1 Score (macro): 0.8273677342371851
F1 Score (micro): 0.9453347969264544


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.0395626844615639
Validation loss: 0.32323767518608837
F1 Score (Weighted): 0.9456568066987731
F1 Score (macro): 0.8285651807870821
F1 Score (micro): 0.9464324917672888


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.027126217575097353
Validation loss: 0.3427034601610789
F1 Score (Weighted): 0.9467517377424849
F1 Score (macro): 0.8347344309296604
F1 Score (micro): 0.9453347969264544


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.02336515097900682
Validation loss: 0.34956246083064907
F1 Score (Weighted): 0.9464571947552988
F1 Score (macro): 0.833380849965211
F1 Score (micro): 0.9466520307354556


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.017196030857794652
Validation loss: 0.3651320127810131
F1 Score (Weighted): 0.9470369712032527
F1 Score (macro): 0.8338107233827565
F1 Score (micro): 0.9475301866081229


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.013655793285378198
Validation loss: 0.37041010835810884
F1 Score (Weighted): 0.9480469056635418
F1 Score (macro): 0.8389039306365943
F1 Score (micro): 0.9477497255762898
Prediction accuracy for individual class:
Class: negative
Accuracy: 104/172,  0.605 

Class: neutral
Accuracy: 2037/2128,  0.957 

Class: positive
Accuracy: 2176/2255,  0.965 

best epoch for the best F1 score (macro):  10


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'trituenhantaoio/bert-base-vietnamese-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 104/172,  0.605 

Class: neutral
Accuracy: 2037/2128,  0.957 

Class: positive
Accuracy: 2176/2255,  0.965 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'NlpHUST/vibert4news-base-cased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'NlpHUST/vibert4news-base-cased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'
train_data_type = 'Reviews-Suwasit Viet Team'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RobertaTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are ini

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.2383132414447505
Validation loss: 0.21876210953934086
F1 Score (Weighted): 0.9291970186508455
F1 Score (macro): 0.8035426682735313
F1 Score (micro): 0.9321624588364434


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.1627911491005962
Validation loss: 0.19198702354629507
F1 Score (Weighted): 0.9393789896815974
F1 Score (macro): 0.8272501945507313
F1 Score (micro): 0.9365532381997803


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.13059068262892676
Validation loss: 0.19813696666244876
F1 Score (Weighted): 0.9426583093232053
F1 Score (macro): 0.8261158702370377
F1 Score (micro): 0.9455543358946213


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.10164462758817516
Validation loss: 0.23693640487883322
F1 Score (Weighted): 0.942209851472814
F1 Score (macro): 0.8271454514515488
F1 Score (micro): 0.9413830954994512


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.07797285255971831
Validation loss: 0.27734781394839253
F1 Score (Weighted): 0.9405753816447481
F1 Score (macro): 0.8249318478686303
F1 Score (micro): 0.937211855104281


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.05702098067261154
Validation loss: 0.304900037408643
F1 Score (Weighted): 0.9404337205224588
F1 Score (macro): 0.8278348785068235
F1 Score (micro): 0.9391877058177827


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.04604181447265545
Validation loss: 0.3324974156195439
F1 Score (Weighted): 0.9421905342551964
F1 Score (macro): 0.8337940160054629
F1 Score (micro): 0.9418221734357849


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.037207998415563104
Validation loss: 0.37582991279170463
F1 Score (Weighted): 0.9405172562778628
F1 Score (macro): 0.8283035516910596
F1 Score (micro): 0.938748627881449


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.027520978361175466
Validation loss: 0.3639526538981667
F1 Score (Weighted): 0.9429343736259003
F1 Score (macro): 0.8390578572267754
F1 Score (micro): 0.9429198682766191


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.022549846195634226
Validation loss: 0.37562972189528715
F1 Score (Weighted): 0.9421217972598572
F1 Score (macro): 0.8347893882745031
F1 Score (micro): 0.9418221734357849
Prediction accuracy for individual class:
Class: negative
Accuracy: 104/172,  0.605 

Class: neutral
Accuracy: 2010/2128,  0.945 

Class: positive
Accuracy: 2176/2255,  0.965 

best epoch for the best F1 score (macro):  9


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 104/172,  0.605 

Class: neutral
Accuracy: 2021/2128,  0.950 

Class: positive
Accuracy: 2170/2255,  0.962 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'bert-base-uncased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'bert-base-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'
train_data_type = 'Reviews-Suwasit Viet Team'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.2641873438041281
Validation loss: 0.21834684072160407
F1 Score (Weighted): 0.9287294175162258
F1 Score (macro): 0.7758200429185429
F1 Score (micro): 0.9334796926454446


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.18041471182205163
Validation loss: 0.16396065974411997
F1 Score (Weighted): 0.9366755586789731
F1 Score (macro): 0.8041269270223191
F1 Score (micro): 0.9391877058177827


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.15365719529628408
Validation loss: 0.20246641065010376
F1 Score (Weighted): 0.9381606513896674
F1 Score (macro): 0.8181636984346845
F1 Score (micro): 0.9347969264544457


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.13093550854590064
Validation loss: 0.19575740146029968
F1 Score (Weighted): 0.9431492558092733
F1 Score (macro): 0.8367716411719938
F1 Score (micro): 0.9420417124039517


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.10908955375741512
Validation loss: 0.2378275172973844
F1 Score (Weighted): 0.9434404271451162
F1 Score (macro): 0.8383169083943178
F1 Score (micro): 0.9433589462129529


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.09509179219792113
Validation loss: 0.26204772568797985
F1 Score (Weighted): 0.9398176010247637
F1 Score (macro): 0.8292442857507805
F1 Score (micro): 0.94006586169045


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.0740721034192649
Validation loss: 0.2950919495508475
F1 Score (Weighted): 0.9400404090680823
F1 Score (macro): 0.8251222461748778
F1 Score (micro): 0.9391877058177827


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.06050803882947447
Validation loss: 0.3249009335363537
F1 Score (Weighted): 0.9373041115283858
F1 Score (macro): 0.8163005748115877
F1 Score (micro): 0.937211855104281


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.053655482594291655
Validation loss: 0.33875315198399075
F1 Score (Weighted): 0.9379394492137435
F1 Score (macro): 0.8220598237525308
F1 Score (micro): 0.9367727771679473


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.04378804753599807
Validation loss: 0.34255669197849337
F1 Score (Weighted): 0.9388724512427751
F1 Score (macro): 0.8232610956258258
F1 Score (micro): 0.9385290889132821
Prediction accuracy for individual class:
Class: negative
Accuracy: 99/172,  0.576 

Class: neutral
Accuracy: 2015/2128,  0.947 

Class: positive
Accuracy: 2161/2255,  0.958 

best epoch for the best F1 score (macro):  5


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'bert-base-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  neutral
Prediction accuracy for individual class:
Class: negative
Accuracy: 104/172,  0.605 

Class: neutral
Accuracy: 2029/2128,  0.953 

Class: positive
Accuracy: 2164/2255,  0.960 




In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' BERT model with best F1 score macro
# Used VnEmoLex, which was validated by Suwasit's Vietnamese team, to train pre_trained_model.

In [None]:
 # Create dictionary for class labels

label_dict = {'positive': 2, 'neutral': 1, 'negative': 0}
label_dict_inverse = {v: k for k, v in label_dict.items()}

VnEmoLex_validated_df = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/VnEmoLex_Validate_V1.xlsx',
                                      engine='openpyxl',
                                      sheet_name ='GS_VN _Dictionary', 
                                      skiprows=1)
emotion_label_list = []
for i in range(len(VnEmoLex_validated_df)):
    if (VnEmoLex_validated_df['Positive'][i] == 1) & (VnEmoLex_validated_df['Negative'][i] == 0):
        emotion_label_list.append(2) 
    elif (VnEmoLex_validated_df['Positive'][i] == 0) & (VnEmoLex_validated_df['Negative'][i] == 1):
        emotion_label_list.append(0) 
    else: emotion_label_list.append(1) 

VnEmoLex_validated_df['label'] = emotion_label_list
VnEmoLex_validated_df['emotion'] = VnEmoLex_validated_df['label'].replace(label_dict_inverse)
VnEmoLex_validated_df1 = VnEmoLex_validated_df[['Vietnamese', 'emotion','label']].reset_index()
print(VnEmoLex_validated_df1.shape)
VnEmoLex_validated_df1.head()


(10622, 4)


Unnamed: 0,index,Vietnamese,emotion,label
0,0,tháng này,neutral,1
1,1,môn phái,neutral,1
2,2,3D,neutral,1
3,3,a xít,negative,0
4,4,ác,negative,0


In [None]:
# Create reviews label dateset
df = set_data_category_in_df(VnEmoLex_validated_df1) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'NlpHUST/vibert4news-base-cased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'
train_data_type = 'VnEmoLex Validate-Viet Team'
train_data_type_list.append(train_data_type)

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_VnEmoLex_validated_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_VnEmoLex_validated_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_VnEmoLex_validated.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
1    4673
0    3149
2    2800
Name: label, dtype: int64


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RobertaTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are ini

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 1
Training loss: 1.0819888671762066
Validation loss: 1.0662315595418888
F1 Score (Weighted): 0.3855637457212696
F1 Score (macro): 0.3523098995494252
F1 Score (micro): 0.4268235294117647


Epoch 2:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 2
Training loss: 1.0241939902753758
Validation loss: 1.0004924056225253
F1 Score (Weighted): 0.43526337983530977
F1 Score (macro): 0.3974910495437977
F1 Score (micro): 0.49223529411764705


Epoch 3:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.9309987917654496
Validation loss: 0.9852895833047709
F1 Score (Weighted): 0.5098568221734615
F1 Score (macro): 0.5108996780213837
F1 Score (micro): 0.5110588235294118


Epoch 4:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.8542203005207213
Validation loss: 0.9701720041439945
F1 Score (Weighted): 0.5400304027257107
F1 Score (macro): 0.5315443324175912
F1 Score (micro): 0.5392941176470588


Epoch 5:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.7766379962178102
Validation loss: 0.9976235025359276
F1 Score (Weighted): 0.557606673101395
F1 Score (macro): 0.5455467319113311
F1 Score (micro): 0.5595294117647058


Epoch 6:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.7061088198333755
Validation loss: 1.0381318415914262
F1 Score (Weighted): 0.5495512404142789
F1 Score (macro): 0.5354789332270299
F1 Score (micro): 0.5552941176470588


Epoch 7:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.6513511541866719
Validation loss: 1.0447741354766644
F1 Score (Weighted): 0.5662755374330085
F1 Score (macro): 0.5548104473007501
F1 Score (micro): 0.5694117647058824


Epoch 8:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.5985654457507277
Validation loss: 1.093193575403744
F1 Score (Weighted): 0.5549170989333843
F1 Score (macro): 0.5419046349773876
F1 Score (micro): 0.5581176470588235


Epoch 9:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.5549916433156433
Validation loss: 1.1477255816746474
F1 Score (Weighted): 0.5539860937467698
F1 Score (macro): 0.5417749871172602
F1 Score (micro): 0.5571764705882353


Epoch 10:   0%|          | 0/532 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.5286805047688627
Validation loss: 1.1617781286849116
F1 Score (Weighted): 0.5528946749474734
F1 Score (macro): 0.5418634799177057
F1 Score (micro): 0.5548235294117647
Prediction accuracy for individual class:
Class: negative
Accuracy: 311/630,  0.494 

Class: neutral
Accuracy: 595/935,  0.636 

Class: positive
Accuracy: 273/560,  0.487 

best epoch for the best F1 score (macro):  7


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  negative
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  neutral



In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' BERT model with best F1 score macro
# Used VnEmoLex, which was validated by Suwasit's Vietnamese team, to train pre_trained_model.

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_VnEmoLex_validated.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_VnEmoLex_validated_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  negative
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  neutral
Prediction accuracy for individual class:
Class: negative
Accuracy: 338/630,  0.537 

Class: neutral
Accuracy: 617/935,  0.660 

Class: positive
Accuracy: 255/560,  0.455 




In [None]:
prediction_accuracy_graph

In [None]:
model_df.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_model_df.csv') 

In [None]:
width=120
height=110
domain=(0, 1.2)

def draw_graph(df, domain, table_id, width,height, first_line_title, 
               second_line_title, third_line_title):
  
    """
    Create draw_graph function to plot graph for F1 scores(macro, micro and weighted), 
    train and validation losses of trained BERT model.
    """


    df=df[df['table_id']==table_id]

    base = alt.Chart(df).mark_line().encode(
        x='epoch'
    ).properties(
        width=width,
        height=height,
        title={
          "text": [first_line_title,second_line_title,third_line_title], 
          "color": "black",
          "subtitleColor": "black"
        }
    )

    graph=base.mark_line().encode(
        y=alt.Y("value:Q",title="",scale=alt.Scale(domain=domain)),
        color=alt.Color('metrics_type:N',
                        legend=alt.Legend(
                        orient='bottom',
                        titleFontSize=11,
                        titleColor='black',
                        labelFontSize=10.5,
                        labelColor='black',
                        direction='horizontal'
                        )
                       ),
        tooltip=['emotion','epoch','metrics_type','value']
        ).interactive()

    return graph

def combined_graph(df,width,height):

    """
    Create combined_graph function to combine all the graphs generated 
    by function draw_graph for trained BERT model metrics.
    """


    train_data_provider_dict={}
    train_data_type_dict={}
    model_type_dict={}
    emotion_dict={}
   
    for i in range(1,len(df['table_id'].unique())+1):
        train_data_provider_dict[i] = df[df['table_id']==i]['train_data_creator'].unique()[0].split("'")[0]
        train_data_type_dict[i] = df[df['table_id']==i]['train_data_type'].unique()[0].split("'")[0]
        model_type_dict[i] = df[df['table_id']==i]['pre_trained_model'].unique()[0].split('/')[0]
        emotion_dict[i] = df[df['table_id']==i]['emotion'].unique()[0]
        graph_temp = draw_graph(df, domain, i, width, height, model_type_dict[i],
                                emotion_dict[i], train_data_type_dict[i] )
        if i == 1:
            graph = graph_temp
        elif (i > 1) and (i <5): 
            graph = graph | graph_temp
            graph1 = graph
        elif i == 5:
            graph2 = graph_temp
        elif i > 5:
            graph2 = graph2 | graph_temp
     
    title = alt.Chart({"values": [{"text": "Sentiment Analysis Model Evaluation by Pre-trained BERT Model"}]}
                      ).mark_text(
                        size=15,dx=330,dy=0,color="black"
                      ).encode(
                      text='text:N'
                      ).properties(
                      width=width,height=100
                      )

    chart = (title & graph1 &   graph2
            ).configure_view(
                stroke=None
            ).configure_concat(
                spacing=20
            ).configure_title(
                fontSize=10.5
            )

    return chart


In [None]:
def prepare_metrics_df(model_df, colname):

    """
    Create prepare_metrics_df function to output dataframe df_metrics with trained BERT model metrics.
    """


    df_metrics = pd.DataFrame()

    for i in range(len(model_df)):  
        df_metrics_temp = pd.read_csv(list(model_df[colname])[i], encoding='ISO-8859-1')
        df_metrics_temp['table_id'] = i+1
        df_metrics = pd.concat([df_metrics, df_metrics_temp], axis =0 )
    return df_metrics

# Import dataset for lists of paths of the files which have model evaluation metrics of the trained models. 
model_df = pd.read_csv(f'/content/drive/MyDrive/Realtime Dreamer/model_info.csv', encoding='ISO-8859-1')

# Generate df_metrics dataset with model evaluation metrics of the trained models.
df_metrics = prepare_metrics_df(model_df, 'eval_df_path')

# Save df_metrics.
df_metrics.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_df_metrics.csv', index=False)

In [None]:
# Generated df_metrics_long as long format of df_metrics dataframe.
df_metrics_long = pd.melt(df_metrics, 
                          id_vars=['emotion', 'epoch','batch_size', 'Ir', 'eps', 'train_data_type', 'note', 'table_id',
                                   'pre_trained_model', 'hidden_dropout_prob', 'attention_probs_dropout_prob','train_data_creator'], 
                          value_vars=['train_loss', 'val_loss', 'F1_score_weighted', 'F1_score_macro', 'F1_score_micro'],
                          var_name=['metrics_type'])

In [None]:
# Save df_metrics_long.
df_metrics_long.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/df_metrics_long.csv', index=False)

In [None]:
# Plot graphs for BERT model evaluation metrics: F1 scores(macro, micro, weighted), train and validation losses
combined_graph(df_metrics_long, 180, 110)

In [None]:
""""
Prepare df_metrics_long_gb dataset with max F1 score macro for each of above Trained BERT models
in order to sort the graphs by max F1 score macro
"""


df_metrics_long_gb = df_metrics_long[df_metrics_long['metrics_type']=='F1_score_macro'].groupby(['table_id',
                                                                                              'pre_trained_model',
                                                                                              'metrics_type',
                                                                                              'train_data_creator',
                                                                                              'train_data_type'])['value'].max()
df_metrics_long_gb = df_metrics_long_gb.dropna().reset_index()                     
df_metrics_long_gb['model type'] = [model_type[0] for model_type in df_metrics_long_gb['pre_trained_model'].str.split('/')]
df_metrics_long_gb.sort_values(by=['value','table_id'], ascending=[False,True], inplace=True)

tableid_modeltype_list = []

for id, type in zip(df_metrics_long_gb['table_id'], df_metrics_long_gb['model type']):
    tableid_modeltype_list.append((str(id) + '-' + type))
    
print(tableid_modeltype_list)

df_metrics_long_gb['table_id_model_type']=tableid_modeltype_list

sorted_table_id=list(df_metrics_long_gb['table_id'])
print(sorted_table_id)

df_metrics_long_gb['tableId_modelType']=tableid_modeltype_list

['1-trituenhantaoio', '2-NlpHUST', '3-bert-base-uncased', '4-trituenhantaoio', '6-NlpHUST', '5-trituenhantaoio', '7-bert-base-uncased', '8-NlpHUST']
[1, 2, 3, 4, 6, 5, 7, 8]


In [None]:
def prepare_metrics(df, df_long_gb, metric_col1, metric_col2):

    """
    Create prepare_metrics function to output metrics_list for BERT model performance metrics
    """


    metrics_list = []
    for ind,tbl_id in enumerate(sorted_table_id):
        metrics_list.append(df[(df['table_id']==tbl_id) & (df[metric_col1]==df_long_gb['value'].values[ind])][metric_col2].values[0])
    return metrics_list

F1_score_weighted_list = prepare_metrics(df_metrics, df_metrics_long_gb, 'F1_score_macro','F1_score_weighted')
F1_score_micro_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','F1_score_micro')
train_loss_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','train_loss')
val_loss_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','val_loss')

df_metrics_long_gb['F1_score_macro'] = df_metrics_long_gb['value']
#df_metrics_long_gb.drop(['metrics_type'], axis=1, inplace=True)
df_metrics_long_gb['F1_score_micro'] = F1_score_micro_list
df_metrics_long_gb['F1_score_weighted'] = F1_score_weighted_list
df_metrics_long_gb['train_loss'] = train_loss_list
df_metrics_long_gb['val_loss'] = val_loss_list

In [None]:
def plot_metrics(df,score_column):
    
    """
    Create plot_metrics function to plot graph of trained BERT model metrics
    Output graph
    """


    graph = alt.Chart(df).mark_bar(size=10).encode(
        x = alt.X('tableId_modelType:N', sort=sorted_table_id),
        y = alt.Y(score_column, title='',scale=alt.Scale(domain=(0,1))),
        color = alt.Color('train_data_type:N',
                          scale=alt.Scale(scheme='redyellowgreen'),
                          legend=alt.Legend(orient='bottom',
                                            titleFontSize=11,
                                            titleColor='black',
                                            labelFontSize=10.5,
                                            labelColor='black',
                                            direction='horizontal')),
        tooltip = ['train_data_creator',
                   'pre_trained_model',
                   score_column]
        ).interactive(
        ).properties(width=140,
                     height=150,
                     title=score_column)
    return graph

def combined_metrics_graph(df_metrics_long_gb):

    """
    Create combined_metrics_graph function to combine graphs for trained BERT models.
    Output combined graph.
    """


    title = alt.Chart({"values": [{"text": "Sentiment Analysis Model Evaluation by Metrics"}]}
                      ).mark_text(size=15, dx=350, dy=0, color="black"
                      ).encode(text='text:N'
                      ).properties(width=110,height=150)

    chart = plot_metrics(df_metrics_long_gb, 'F1_score_macro') | plot_metrics(df_metrics_long_gb,'F1_score_weighted')| \
            plot_metrics(df_metrics_long_gb, 'F1_score_micro') | plot_metrics(df_metrics_long_gb,'val_loss')| \
            plot_metrics(df_metrics_long_gb,'train_loss') 

    chart1 = (title & chart
              ).configure_view(stroke=None
              ).configure_concat(spacing=15
              ).configure_title(fontSize=12)

    return chart1

combined_metrics_graph(df_metrics_long_gb)

In [None]:
def plot_metric_scores(df, col1, col2, metric, title):

    """
    Create plot_metric_scores function to plot prediction accuracy scores
    of the trained BERT models.
    """


    tableid_modeltype_list = []
    
    for id, type in zip(df['table_id'], df['pre_trained_model']):
        tableid_modeltype_list.append((str(id) +'-' + type))

    df['tableId_modelType'] = tableid_modeltype_list
    sorted_table_id2 = list(df[df[col1]==metric].sort_values(by=[col2,'table_id'],
                                                                 ascending=[False,True])['tableId_modelType'])
    
    df= df.sort_values(by=[col1, col2],ascending=[True,False])

    title = alt.Chart({"values": [{"text": title}]}
                      ).mark_text(size=15, dx=240, dy=0, color="black"
                      ).encode(text='text:N'
                      ).properties(width=110,height=30)

    graph = alt.Chart(acc_per_class_combined_df).mark_bar(size=10).encode(
            x = alt.X('tableId_modelType:N',title=""),
            y = alt.Y(col2 + ':Q'),  
            color = alt.Color('train_data_type:N', scale=alt.Scale(scheme='redyellowgreen'),
                              legend=alt.Legend(
                              orient='bottom',
                              titleFontSize=11,
                              titleColor='black',
                              labelFontSize=10.5,
                              labelColor='black',
                              direction='horizontal')),
            column=alt.Column(col1 + ':N', title="", header=alt.Header(labelFontSize=13), sort=sorted_table_id2),
            tooltip=[col2,'train_data_creator','epoch','pre_trained_model']
      
        ).properties(width=185,height=100)
    chart = (title & graph
            ).configure_view(stroke=None
            ).configure_concat(spacing=15
            ).configure_title(fontSize=30
            ).configure_axis(labelFontSize = 12,
                             titleFontSize = 12
            )
    return chart

# Import dataset for prediction accuracy of individual classes of trained BERT model
acc_per_class_combined_df = prepare_metrics_df(model_df, 'accuracy_per_class_path')

# Plot prediction accuracy scores of the trained BERT models.
title = "Sentiment Analysis Model Evaluation by Individual Class Prediction Accuracy"
plot_metric_scores(acc_per_class_combined_df, 'class', 'score', 'negative', title)