<a href="https://colab.research.google.com/github/yunhonghe/COVID-19-Pandemic/blob/main/Sentiment_Analysis_BERT_Model_Evaluation_Aug_13_2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Mount My Google Drive files
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
# Install required modules
!pip install transformers
!pip install torch
!pip install openpyxl
!pip3 install tqdm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.21.1-py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 7.9 MB/s 
Collecting pyyaml>=5.1
  Downloading PyYAML-6.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl (596 kB)
[K     |████████████████████████████████| 596 kB 52.2 MB/s 
Collecting huggingface-hub<1.0,>=0.1.0
  Downloading huggingface_hub-0.8.1-py3-none-any.whl (101 kB)
[K     |████████████████████████████████| 101 kB 12.1 MB/s 
Collecting tokenizers!=0.11.3,<0.13,>=0.11.1
  Downloading tokenizers-0.12.1-cp37-cp37m-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (6.6 MB)
[K     |████████████████████████████████| 6.6 MB 51.1 MB/s 
Installing collected packages: pyyaml, tokenizers, huggingface-hub, transformers
  Attempting uninstall: pyyaml
    Found existing installation: PyYAML 3.13
    Uninstalling P

In [None]:
# Monitor and manage the GPU's on the system
!nvidia-smi

Fri Aug 12 12:55:02 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    26W / 250W |      0MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
# Import libraries
import pandas as pd
pd.options.mode.chained_assignment = None 
import numpy as np
import random
from sklearn.metrics import f1_score
import torch
from torch.utils.data import DataLoader, TensorDataset, RandomSampler, SequentialSampler
from sklearn.model_selection import train_test_split
from transformers import BertForSequenceClassification,BertTokenizer,BertModel,AutoModel, AutoTokenizer,AdamW, get_linear_schedule_with_warmup
from tqdm.notebook import tqdm
import altair as alt
alt.renderers.enable('default')
import warnings
warnings.filterwarnings('ignore')
no_deprecation_warning=True

In [None]:
# Set value for MAX_LEN, and RANDOM_SEED
MAX_LEN = 768
RANDOM_SEED = 42
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)
torch.cuda.manual_seed_all(RANDOM_SEED)

# See current device
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cuda', index=0)

In [None]:
# Read reviews label dataset
train_review_df = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/train reviews.xlsx',
                                engine='openpyxl',
                                sheet_name ='train', 
                                skiprows=0)
train_review_df = train_review_df.rename(columns={'Review Content':'Vietnamese'})
print(train_review_df.shape)
train_review_df_before_oversample = train_review_df.copy()

print('Count of individual class before oversamping negative class:')
print(train_review_df['emotion'].value_counts())

# There are imbalance classes in the review label dataset. Oversample the negative class data to handle imblance class issue
k_neg = len(train_review_df[train_review_df['emotion']=='positive'])\
        -len(train_review_df[train_review_df['emotion']=='negative'])

new_index_neg = random.choices(train_review_df[train_review_df['emotion']=='negative']['index'].values, 
                             k=k_neg)
df_add_neg = pd.DataFrame(new_index_neg,columns=['index'])

# Merge addtional negative class data generated by oversampling to original reviews label dataset
df_add_neg_combined = pd.merge(df_add_neg,train_review_df,
                             how='left',
                             on=['index'])
train_review_df = train_review_df.append(df_add_neg_combined).reset_index()
train_review_df.drop(['level_0'], axis=1, inplace=True)
print('\nCount of individual class after oversamping negative class:')
print(train_review_df['emotion'].value_counts())

(5703, 4)
Count of individual class before oversamping negative class:
positive    5270
neutral      269
negative     164
Name: emotion, dtype: int64

Count of individual class after oversamping negative class:
positive    5270
negative    5270
neutral      269
Name: emotion, dtype: int64


In [None]:
 # Create dictionary for class labels
label_dict = {'positive': 2, 'neutral': 1, 'negative': 0}
label_dict_inverse = {v: k for k, v in label_dict.items()}


def add_label_to_df(df):

    """Add label to reviews label dataset."""  


    df['label'] = df['emotion'].replace(label_dict)
    df = df[['index','Vietnamese','emotion','label']]
    return df


def data_split(df):

    """
    Splict review lable dataset into train and validation data. 
    Stratify the data to handle imbalance class issue.
    """


    df=add_label_to_df(df)
    X_train, X_val, y_train, y_val = train_test_split(df.index.values,
                                                      df['label'].values,
                                                      test_size=0.20,
                                                      random_state=RANDOM_SEED,
                                                      stratify=df['label'].values)
    return X_train, X_val, y_train, y_val


def set_data_category_in_df(df): 

    """Set data categary inside the reviews label data."""


    X_train, X_val, y_train, y_val = data_split(df)
    df['data_category'] = ['unset']*df.shape[0]
    df.loc[X_train, 'data_category'] = 'train'
    df.loc[X_val, 'data_category'] = 'val'
    return df

In [None]:
def encode_data_and_prepare_dataset(df):

    """
    Encode train data, validation data. 
    Set input_ids, attention_masks and labels for train and validationd data.
    Create train and validation dataset.
    """


    df = set_data_category_in_df(df)
    encoded_data_train = tokenizer.batch_encode_plus(df[df.data_category=='train'].Vietnamese.values,
                                                     add_special_tokens=True,
                                                     return_attention_mask=True,
                                                     padding=True,
                                                     return_tensors='pt')

    encoded_data_val = tokenizer.batch_encode_plus(df[df.data_category=='val'].Vietnamese.values,
                                                   add_special_tokens=True,
                                                   return_attention_mask=True,
                                                   padding=True,
                                                   return_tensors='pt')

    input_ids_train = encoded_data_train['input_ids']
    attention_masks_train = encoded_data_train['attention_mask']
    labels_train = torch.tensor(df[df.data_category=='train']['label'].values) 

    input_ids_val = encoded_data_val['input_ids']
    attention_masks_val = encoded_data_val['attention_mask']
    labels_val = torch.tensor(df[df.data_category=='val']['label'].values) 

    dataset_train = TensorDataset(input_ids_train, attention_masks_train, labels_train)
    dataset_val = TensorDataset(input_ids_val, attention_masks_val, labels_val)
    return dataset_train,dataset_val 

In [None]:
def build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob):

    """Build BERT model"""


    model = BertForSequenceClassification.from_pretrained(pre_trained_model, 
                                                          num_labels=len(label_dict),
                                                          output_attentions=False,
                                                          output_hidden_states=False,
                                                          hidden_dropout_prob = hidden_dropout_prob,
                                                          attention_probs_dropout_prob = attention_probs_dropout_prob
                                                          )
    return model


def build_dataloader(df,batch_size):

    """Build train and validation dataloader."""


    dataset_train, dataset_val  =encode_data_and_prepare_dataset(df)
    dataloader_train = DataLoader(dataset_train, 
                                  sampler=RandomSampler(dataset_train), 
                                  batch_size=batch_size)
    dataloader_validation = DataLoader(dataset_val, 
                                      sampler=SequentialSampler(dataset_val), 
                                      batch_size=batch_size)
    return dataloader_train,dataloader_validation


def setup_optimizer(Ir,eps):

    """Setup optimizer."""


    optimizer = AdamW(model.parameters(),
                      lr=Ir, 
                      eps=eps)
    return optimizer


def setup_scheduler(optimizer,Ir,eps,epochs):

    """Setup scheduler."""


    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps=0,
                                                num_training_steps=len(dataloader_train)*epochs)
    return scheduler

In [None]:
def evaluate(dataloader_validation):

    """Create evaluate function to evaluate BERT model."""


    model.eval()
    
    loss_val_total = 0
    predictions, true_vals = [], []
    
    for batch in dataloader_validation:
        
        batch = tuple(b.to(device) for b in batch)
        
        inputs = {'input_ids':      batch[0],
                  'attention_mask': batch[1],
                  'labels':         batch[2],
                 }

        with torch.no_grad():        
            outputs = model(**inputs)
            
        loss = outputs[0]
        logits = outputs[1]
        loss_val_total += loss.item()

        logits = logits.detach().cpu().numpy()
        label_ids = inputs['labels'].cpu().numpy()
        predictions.append(logits)
        true_vals.append(label_ids)
    
    loss_val_avg = loss_val_total/len(dataloader_validation) 
    
    predictions = np.concatenate(predictions, axis=0)
    true_vals = np.concatenate(true_vals, axis=0)
            
    return loss_val_avg, predictions, true_vals

In [None]:
def train_model(Ir,eps,epochs,batch_size,dataloader_validation):

    """
    Create train_model function to Train BERT model.
    Output trained BERT model and eval_df dataset for trained BERT information including model metrics.
    """


    epoch_list=[]
    train_loss=[]
    validation_loss=[]
    F1_score_weighted=[]
    F1_score_macro=[]
    F1_score_micro=[]
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.to(device)
    print(device)

    optimizer = setup_optimizer(Ir,eps)
    
    scheduler = setup_scheduler(optimizer,Ir,eps,epochs)
    
    for epoch in tqdm(range(1, epochs+1)):
        
        epoch_list.append(epoch)
        model.train()
        
        loss_train_total = 0

        progress_bar = tqdm(dataloader_train, desc='Epoch {:1d}'.format(epoch), leave=False, disable=False)
    
        for batch in progress_bar:

            model.zero_grad()
            
            batch = tuple(b.to(device) for b in batch)

            inputs = {'input_ids':      batch[0],
                      'attention_mask': batch[1],
                      'labels':         batch[2],
                    }       

            outputs = model(**inputs)
          
            loss = outputs[0]
            loss_train_total += loss.item()
            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

            optimizer.step()
            scheduler.step()

            progress_bar.set_postfix({'training_loss': '{:.3f}'.format(loss.item()/len(batch))})
            
        torch.save(model.state_dict(), f'emotion_{model_type}_NLP_{epoch}.model')
            
        tqdm.write(f'\nEpoch {epoch}')
        
        loss_train_avg = loss_train_total/len(dataloader_train)            
        tqdm.write(f'Training loss: {loss_train_avg}')
        train_loss.append(loss_train_avg)

        val_loss, predictions, true_vals = evaluate(dataloader_validation) 
        val_f1_weighted = f1_score_func(predictions, true_vals,'weighted')
        val_f1_macro = f1_score_func(predictions, true_vals,'macro')
        val_f1_micro = f1_score_func(predictions, true_vals,'micro')
        validation_loss.append(val_loss)
        F1_score_weighted.append(val_f1_weighted)
        F1_score_macro.append(val_f1_macro)
        F1_score_micro.append(val_f1_micro)

        tqdm.write(f'Validation loss: {val_loss}')
        tqdm.write(f'F1 Score (Weighted): {val_f1_weighted}')
        tqdm.write(f'F1 Score (macro): {val_f1_macro}')
        tqdm.write(f'F1 Score (micro): {val_f1_micro}')

        # Create dataframe for BERT model evaluation metrics
        eval_df = pd.DataFrame()
        eval_df['emotion'] = ['positive,negative,neutral'] * len(epoch_list)
        eval_df['epoch'] = epoch_list
        eval_df['train_loss'] = train_loss
        eval_df['val_loss'] = validation_loss
        eval_df['F1_score_weighted'] = F1_score_weighted
        eval_df['F1_score_macro'] = F1_score_macro
        eval_df['F1_score_micro'] = F1_score_micro
        eval_df['batch_size'] = batch_size
        eval_df['Ir'] = Ir
        eval_df['eps'] = eps
        eval_df['pre_trained_model'] = pre_trained_model
        eval_df['hidden_dropout_prob'] = hidden_dropout_prob
        eval_df['attention_probs_dropout_prob'] = attention_probs_dropout_prob  
        eval_df['note'] = 'Added and splited Self-judged Review emotions into train and valiation by 8:2'
        eval_df['train_data_creator'] = train_data_provider
        eval_df['train_data_type'] = np.where(eval_df['train_data_creator'] == 'Yunhong He',\
                                              'Yunhong keyword + oversampling','Reviews-Suwasit Viet team' )
  return eval_df, model

In [None]:
"""
Initiate eval_df_path_list, model_path_list, accuracy_per_class_path_list, 
best_epoch_F1_score_macro_list, and best_model_path_list
"""

eval_df_path_list = []
model_path_list = []
accuracy_per_class_path_list = []
best_epoch_F1_score_macro_list = []
best_model_path_list = []

In [None]:
def f1_score_func(preds, labels, average):

    """Create f1_score_func to calculate F1 score."""


    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return f1_score(labels_flat, preds_flat, average=average)


def accuracy_per_class(preds, labels):    

    """
    Create accuracy_per_class function to generate a dataframe 
    for prediction accuracy of individual class
    """


    class_list = []
    score_list = []
    preds_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    
    print("Prediction accuracy for individual class:")
    for label in np.unique(labels_flat):
        y_preds = preds_flat[labels_flat==label]
        y_true = labels_flat[labels_flat==label]
        acc= len(y_preds[y_preds==label])/len(y_true)
        class_list.append(label_dict_inverse[label])
        score_list.append(len(y_preds[y_preds==label])/len(y_true))     
        print(f'Class: {label_dict_inverse[label]}')
        print(f'Accuracy: {len(y_preds[y_preds==label])}/{len(y_true)}, ',\
              '{:.3f}'.format(acc),'\n')
    
    # Create dataframe to hold information about the BERT model
    df = pd.DataFrame(class_list,columns=['class'])
    df['score'] = score_list
    df['pre_trained_model'] = model_type
    df['epoch'] = epochs
    df['train_data_creator'] = train_data_provider
    return df


def predict_text(input_text):

    """Create predict_text function to predict text"""


    inputs = tokenizer(input_text.lower(), return_tensors="pt").to(device)
    with torch.no_grad():
        logits = model(**inputs).logits
    predicted_class_id = logits.argmax().item()
    return label_dict_inverse[predicted_class_id]


def predict_emotion_test():

    """"Create predict_emotion_test function to test trained BERT model prediction performance."""


    test_text_product = 'Sản phẩm rất tốt, và mạnh mẽ'    # Very good product, and powerful
    test_text_service = 'Tôi cần ai đó hỗ trợ tôi cách sử dụng' # I need someone to help me how to use it
    test_text_logistic = 'giao hàng quá chậm' # delivery is too slow
    print(" ")
    print("Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ': ", predict_text(test_text_product))
    print("Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng': ", predict_text(test_text_service))
    print("Predict emotion for 'giao hàng quá chậm': ", predict_text(test_text_logistic))


def prepare_review_data():

    """
    Clean and prepare reviews label dataset. 
    Output reviews label datasets before and after removing null value.
    """


    review_df = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/reviews.xlsx',
                              engine='openpyxl',
                              sheet_name ='LZD R&R raw')
    print("\nraw review dataset:",review_df.shape)

    review_df = review_df.reset_index()
    review_df_1 = review_df[['index','Review Content','Rating']].dropna(how='any')
    print("raw review dataset after dropping null value:", review_df_1.shape)
    return review_df,review_df_1


def generate_predition_data():

    """Create generate_predition_data function to predict customer reviews emotion."""


    review_df,review_df1 = prepare_review_data()
    review_df1['emotion'] = review_df1['Review Content'].apply(predict_text)
    review_emotion_prediction = pd.merge(review_df,review_df1[['index','emotion']], how='left', on='index')
    review_emotion_prediction_path = f'/content/drive/MyDrive/Realtime Dreamer/review_emotion_prediction.csv'
    review_emotion_prediction.to_csv(review_emotion_prediction_path, index=False)
    return review_emotion_prediction


def prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list):

    """
    Create prepare_model_df function to generate model_df dataframe to 
    hold the list of paths for eval_df, accuracy_per_class, trained model, 
    and best epoch for the highest F1 macro score of the trained BERT model.
    """


    model_df = pd.DataFrame()
    model_df['eval_df_path'] = eval_df_path_list
    model_df['accuracy_per_class_path'] = accuracy_per_class_path_list
    model_df['model_path'] = model_path_list
    model_df['best_epoch_F1_score_macro'] = best_epoch_F1_score_macro_list
    model_df.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/model_info.csv', index=False)

    return model_df


def draw_F1_scores(df, domain, width, height, first_line_title, second_line_title,epochs):

    """
    Create draw_F1_scores function to plot graph for F1 scores (macro, micro and weighted), train and validation losses.
    """


    if epochs == 1:
        base = alt.Chart(df).mark_point()
    else:
        base = alt.Chart(df).mark_line()

    graph=base.encode(
        x='epoch',
        y=alt.Y("value:Q", title="", scale=alt.Scale(domain=domain)),
        color = alt.Color('line type:N',
                        legend=alt.Legend(
                        title="Metrics",
                        orient='right',
                        titleFontSize=11,
                        titleColor='black',
                        labelFontSize=10.5,
                        labelColor='black',
                        direction='vertical')),
        tooltip = ['emotion', 'epoch', 'line type', 'value']
        ).interactive(
        ).properties(
            width=width,
            height=height,
            title={"text": [first_line_title, second_line_title], "color": "black"})
    return graph


def draw_prediction_accuracy(df, domain, width, height, first_line_title, second_line_title):

    """
    Create draw_prediction_accuracy function to plot graph 
    for prediction accuracy of individual class by different trained BERT models
    """


    base = alt.Chart(df).mark_bar().encode(
        x = alt.X('class:N', axis=alt.Axis(labelAngle=360))
        ).properties(
            width=width,
            height=height,
            title={"text": [first_line_title,second_line_title], "color": "black"})

    graph = base.mark_bar(size=20).encode(
        y = alt.Y("score:Q", title="", scale=alt.Scale(domain=domain)),
        color = alt.Color('class:N', legend=None),
        tooltip = ['pre_trained_model', 'class', 'score', 'epoch', 'train_data_creator']
        ).interactive()
    return graph

# Set df_f1_scores_id_vars, df_f1_scores_value_vars and df_f1_scores_var_name for arguments in pd.melt function. 
df_f1_scores_id_vars = ['emotion', 'epoch', 'batch_size', 'Ir', 'eps', 'pre_trained_model', 'hidden_dropout_prob', 
                        'attention_probs_dropout_prob', 'note', 'train_data_creator']
df_f1_scores_value_vars = ['train_loss', 'val_loss', 'F1_score_weighted', 'F1_score_macro', 'F1_score_micro']   
df_f1_scores_var_name = ['line type']  


def prepare_data_for_metrics_graphs(df, best_model=False):

    """
    Create prepare_data_for_metrics_graphs function to output 
    df_f1_scores_long dataframe with list of F1 scores(macro, micro and weighted) 
    for either 1 to the best epoch of the highest F1 macro score of the trained BERT model 
    or 1 to 10 epoches of the trained BERT model
    """


    if best_model:
        df_f1_scores = df[df['epoch']<=best_epoch_F1_score_macro]
    else:
        df_f1_scores = df.copy()
    df_f1_scores_long = pd.melt(df_f1_scores, 
                                id_vars=df_f1_scores_id_vars, 
                                value_vars=df_f1_scores_value_vars,
                                var_name=df_f1_scores_var_name)
    return df_f1_scores_long


def prepare_metrics_graphs(df_f1_scores_long, df_class_accuracy,domain,width,height,epochs):

    """
    Create prepare_metrics_graphs function to output two graphs:
    1.  F1_scores_graph: plot F1 scores(macro, micro and weighted), train and validation losses for different trained BERT models
    2.  prediction_accuracy_graph: plot prediction accuracy of individual class for different trained BERT models
    """


    F1_scores_graph = draw_F1_scores(df_f1_scores_long,
                                     domain,
                                     width,
                                     height,  
                                     "F1 scores (Macro, Micro, Weighted)",                            
                                     "Train loss, Validation loss",
                                     epochs) 
    
    prediction_accuracy_graph = draw_prediction_accuracy(df_class_accuracy,
                                                         domain,
                                                         width,
                                                         height,                                               
                                                         "Accuracy Per Class",
                                                         "Pre-Trained Model: " \
                                                         + df_class_accuracy['pre_trained_model'][1])
    return F1_scores_graph, prediction_accuracy_graph

In [None]:
# pre_trained_model: 'trituenhantaoio/bert-base-vietnamese-uncased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'trituenhantaoio/bert-base-vietnamese-uncased'
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'
model_type = pre_trained_model.split('/')[0]

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5270
0    5270
1     269
Name: label, dtype: int64


Downloading vocab.txt:   0%|          | 0.00/203k [00:00<?, ?B/s]

Downloading special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/238 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/846 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/422M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.11578427813204811
Validation loss: 0.03416203673687585
F1 Score (Weighted): 0.9916693768583458
F1 Score (macro): 0.9883368969764202
F1 Score (micro): 0.9916743755781684


Epoch 2:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.018812240907535485
Validation loss: 0.025111180869798237
F1 Score (Weighted): 0.9962925044191804
F1 Score (macro): 0.9914979348595717
F1 Score (micro): 0.996299722479186


Epoch 3:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.011766040721014547
Validation loss: 0.025047269418470988
F1 Score (Weighted): 0.9962925044191804
F1 Score (macro): 0.9914979348595717
F1 Score (micro): 0.996299722479186


Epoch 4:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.008682307043128308
Validation loss: 0.01784559497558375
F1 Score (Weighted): 0.9967558712515918
F1 Score (macro): 0.9889148673208007
F1 Score (micro): 0.9967622571692877


Epoch 5:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.004221136329302115
Validation loss: 0.026719512943306174
F1 Score (Weighted): 0.995830780543631
F1 Score (macro): 0.9882823416691208
F1 Score (micro): 0.9958371877890841


Epoch 6:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.004266473548112675
Validation loss: 0.029258986115725267
F1 Score (Weighted): 0.9962933277183168
F1 Score (macro): 0.9885986057398579
F1 Score (micro): 0.996299722479186


Epoch 7:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.0016304625732792575
Validation loss: 0.03268426779480732
F1 Score (Weighted): 0.9953682291032445
F1 Score (macro): 0.9879660746817346
F1 Score (micro): 0.9953746530989824


Epoch 8:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.0010433031910908402
Validation loss: 0.033986606378796164
F1 Score (Weighted): 0.995830780543631
F1 Score (macro): 0.9882823416691208
F1 Score (micro): 0.9958371877890841


Epoch 9:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0008279684336630385
Validation loss: 0.025814107200082672
F1 Score (Weighted): 0.9962933277183168
F1 Score (macro): 0.9885986057398579
F1 Score (micro): 0.996299722479186


Epoch 10:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.0005913600667460276
Validation loss: 0.030158017050740348
F1 Score (Weighted): 0.9962933277183168
F1 Score (macro): 0.9885986057398579
F1 Score (micro): 0.996299722479186
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1048/1054,  0.994 

best epoch for the best F1 score (macro):  2


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'trituenhantaoio/bert-base-vietnamese-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1048/1054,  0.994 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'NlpHUST/vibert4news-base-cased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'NlpHUST/vibert4news-base-cased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5270
0    5270
1     269
Name: label, dtype: int64


Downloading vocab.txt:   0%|          | 0.00/402k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/551 [00:00<?, ?B/s]

The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RobertaTokenizer'. 
The class this function is called from is 'BertTokenizer'.


Downloading pytorch_model.bin:   0%|          | 0.00/512M [00:00<?, ?B/s]

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.17288782014257606
Validation loss: 0.04026971531857271
F1 Score (Weighted): 0.9930560770517879
F1 Score (macro): 0.9892850455590333
F1 Score (micro): 0.9930619796484736


Epoch 2:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.039072342111904246
Validation loss: 0.04117442979152068
F1 Score (Weighted): 0.9930553860641949
F1 Score (macro): 0.9863846810541489
F1 Score (micro): 0.9930619796484736


Epoch 3:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.01857954144581994
Validation loss: 0.021715964876955908
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877


Epoch 4:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.010982686055463941
Validation loss: 0.025312198963654414
F1 Score (Weighted): 0.9962925044191804
F1 Score (macro): 0.9914979348595717
F1 Score (micro): 0.996299722479186


Epoch 5:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.008712384951959494
Validation loss: 0.06303754886539537
F1 Score (Weighted): 0.9912065207488547
F1 Score (macro): 0.9880204216732201
F1 Score (micro): 0.9912118408880666


Epoch 6:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.004138203468338897
Validation loss: 0.01591679056381883
F1 Score (Weighted): 0.9976794729967746
F1 Score (macro): 0.9924462669483635
F1 Score (micro): 0.9976873265494912


Epoch 7:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.0009714147915933263
Validation loss: 0.02760768890964434
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877


Epoch 8:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.0008196807307815339
Validation loss: 0.023995352078306015
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877


Epoch 9:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0016225322425252368
Validation loss: 0.030203295690251465
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877


Epoch 10:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.000689035127936908
Validation loss: 0.02881198733265431
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1049/1054,  0.995 

best epoch for the best F1 score (macro):  6


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  positive
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1051/1054,  0.997 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'bert-base-uncased'

In [None]:
# Create reviews label dateset
df = set_data_category_in_df(train_review_df) 
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value.
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'bert-base-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Yunhong He'

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    5270
0    5270
1     269
Name: label, dtype: int64


Downloading vocab.txt:   0%|          | 0.00/226k [00:00<?, ?B/s]

Downloading tokenizer_config.json:   0%|          | 0.00/28.0 [00:00<?, ?B/s]

Downloading config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/420M [00:00<?, ?B/s]

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.2472123701556615
Validation loss: 0.0298171011313303
F1 Score (Weighted): 0.9953678399909647
F1 Score (macro): 0.9908657006743086
F1 Score (micro): 0.9953746530989824


Epoch 2:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.04231492990798742
Validation loss: 0.04309017605106312
F1 Score (Weighted): 0.9930560770517879
F1 Score (macro): 0.9892850455590333
F1 Score (micro): 0.9930619796484736


Epoch 3:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.018972404430492983
Validation loss: 0.04106773336067871
F1 Score (Weighted): 0.9944431547735713
F1 Score (macro): 0.99023345227456
F1 Score (micro): 0.9944495837187789


Epoch 4:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.01536244838142401
Validation loss: 0.020295823897921603
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877


Epoch 5:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.012501766453326039
Validation loss: 0.052341626535374365
F1 Score (Weighted): 0.9921313180286996
F1 Score (macro): 0.9886527466950495
F1 Score (micro): 0.9921369102682701


Epoch 6:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.005874748469438286
Validation loss: 0.032374679949177386
F1 Score (Weighted): 0.9939808028095408
F1 Score (macro): 0.989917321678028
F1 Score (micro): 0.9939870490286772


Epoch 7:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.004419718818408101
Validation loss: 0.03242795869618931
F1 Score (Weighted): 0.9953678399909647
F1 Score (macro): 0.9908657006743086
F1 Score (micro): 0.9953746530989824


Epoch 8:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.00292848790458351
Validation loss: 0.02376090146400897
F1 Score (Weighted): 0.9962925044191804
F1 Score (macro): 0.9914979348595717
F1 Score (micro): 0.996299722479186


Epoch 9:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.0016679144544455278
Validation loss: 0.030838083504209756
F1 Score (Weighted): 0.9958301744918534
F1 Score (macro): 0.9911818193305139
F1 Score (micro): 0.9958371877890841


Epoch 10:   0%|          | 0/541 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.0016202792436419858
Validation loss: 0.028685754020149463
F1 Score (Weighted): 0.9967548303966522
F1 Score (macro): 0.9918140476879381
F1 Score (micro): 0.9967622571692877
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1049/1054,  0.995 

best epoch for the best F1 score (macro):  4


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'bert-base-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  negative
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 1054/1054,  1.000 

Class: neutral
Accuracy: 52/54,  0.963 

Class: positive
Accuracy: 1049/1054,  0.995 




In [None]:
prediction_accuracy_graph

In [None]:
# Below train data was provided by Suwasit's Vietnamese Team

In [None]:
train_review_df_s = pd.read_excel(f'drive/MyDrive/Realtime Dreamer/Tefal Lazada Product Reviews in TTL202207_Updated_Good_Bad.xlsx',
                                  engine='openpyxl', sheet_name='Database_LZD', skiprows=0)
train_review_df_s = train_review_df_s.rename(columns={'Review Content':'Vietnamese'}).reset_index()
emotion_dict = {'Good':'positive','Bad':'negative'}
train_review_df_s['emotion'] = train_review_df_s['Comment classified Type 1'].replace(emotion_dict)
train_review_df_s = train_review_df_s[['index','Vietnamese','Rating','emotion']][train_review_df_s['Vietnamese'].isnull()==False]
train_review_df_s['emotion'].fillna('neutral',inplace=True)
print(train_review_df_s.shape)

(22771, 4)


In [None]:
# pre_trained_model: 'trituenhantaoio/bert-base-vietnamese-uncased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'trituenhantaoio/bert-base-vietnamese-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.21369946906809173
Validation loss: 0.1691588973258867
F1 Score (Weighted): 0.9456042589084563
F1 Score (macro): 0.825345856290201
F1 Score (micro): 0.9477497255762898


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.1447067579788649
Validation loss: 0.16406048326630537
F1 Score (Weighted): 0.9447237589896689
F1 Score (macro): 0.8292082582313208
F1 Score (micro): 0.9466520307354556


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.10869327558388404
Validation loss: 0.1945464041367941
F1 Score (Weighted): 0.9472021413259363
F1 Score (macro): 0.8436070258660932
F1 Score (micro): 0.9464324917672888


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.07596091603620274
Validation loss: 0.24295766021956264
F1 Score (Weighted): 0.9479838284137985
F1 Score (macro): 0.8471369782489059
F1 Score (micro): 0.9470911086717893


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.05090953236362178
Validation loss: 0.29052251357602554
F1 Score (Weighted): 0.9448434371995753
F1 Score (macro): 0.8423137128310043
F1 Score (micro): 0.9433589462129529


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.0378724798728007
Validation loss: 0.3005342236197433
F1 Score (Weighted): 0.9471447880778566
F1 Score (macro): 0.8396867330024822
F1 Score (micro): 0.9470911086717893


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.0274781659456711
Validation loss: 0.33840580873286835
F1 Score (Weighted): 0.9464360424572114
F1 Score (macro): 0.8426659083443729
F1 Score (micro): 0.9451152579582875


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.021539087464134753
Validation loss: 0.3444504856010322
F1 Score (Weighted): 0.9470474712028093
F1 Score (macro): 0.8375755813897056
F1 Score (micro): 0.9464324917672888


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.018447466272793513
Validation loss: 0.3458920901615082
F1 Score (Weighted): 0.949731872228432
F1 Score (macro): 0.8476246232811158
F1 Score (micro): 0.9495060373216246


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.012971140046525304
Validation loss: 0.3550109547364723
F1 Score (Weighted): 0.948781246495843
F1 Score (macro): 0.8460166647007682
F1 Score (micro): 0.948847420417124
Prediction accuracy for individual class:
Class: negative
Accuracy: 105/172,  0.610 

Class: neutral
Accuracy: 2047/2128,  0.962 

Class: positive
Accuracy: 2170/2255,  0.962 

best epoch for the best F1 score (macro):  9


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'trituenhantaoio/bert-base-vietnamese-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at trituenhantaoio/bert-base-vietnamese-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 108/172,  0.628 

Class: neutral
Accuracy: 2055/2128,  0.966 

Class: positive
Accuracy: 2162/2255,  0.959 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'NlpHUST/vibert4news-base-cased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'NlpHUST/vibert4news-base-cased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


The tokenizer class you load from this checkpoint is not the same type as the class this function is called from. It may result in unexpected tokenization. 
The tokenizer class you load from this checkpoint is 'RobertaTokenizer'. 
The class this function is called from is 'BertTokenizer'.
Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are ini

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.2350161786383726
Validation loss: 0.21264050946399374
F1 Score (Weighted): 0.9311143728330812
F1 Score (macro): 0.7675359069833911
F1 Score (micro): 0.9369923161361141


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.16611633464909478
Validation loss: 0.1967341853855096
F1 Score (Weighted): 0.9402491505239527
F1 Score (macro): 0.8156068069861173
F1 Score (micro): 0.94006586169045


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.13419579589474517
Validation loss: 0.20507805419783517
F1 Score (Weighted): 0.9455416772054563
F1 Score (macro): 0.8104262499036285
F1 Score (micro): 0.9495060373216246


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.10718312170185824
Validation loss: 0.23961900829259927
F1 Score (Weighted): 0.9461291564203106
F1 Score (macro): 0.8297576357803632
F1 Score (micro): 0.9462129527991219


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.08539957387218108
Validation loss: 0.2593415526319867
F1 Score (Weighted): 0.9417264244096957
F1 Score (macro): 0.832028414994415
F1 Score (micro): 0.9405049396267837


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.05979778429738506
Validation loss: 0.2937393859116638
F1 Score (Weighted): 0.9436699064244881
F1 Score (macro): 0.8274088633455374
F1 Score (micro): 0.9448957189901207


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.04740412844105244
Validation loss: 0.3128076931596233
F1 Score (Weighted): 0.9465179256882795
F1 Score (macro): 0.8345282892930636
F1 Score (micro): 0.9470911086717893


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.03874460810626425
Validation loss: 0.3388578181413897
F1 Score (Weighted): 0.9458606059421749
F1 Score (macro): 0.8305592395323398
F1 Score (micro): 0.9475301866081229


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.031183357598140768
Validation loss: 0.3704034609212581
F1 Score (Weighted): 0.9438056918439203
F1 Score (macro): 0.8264073553227572
F1 Score (micro): 0.9424807903402854


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.02751415058256858
Validation loss: 0.3681941417679538
F1 Score (Weighted): 0.9451106500538333
F1 Score (macro): 0.8318403427908011
F1 Score (micro): 0.9446761800219539
Prediction accuracy for individual class:
Class: negative
Accuracy: 102/172,  0.593 

Class: neutral
Accuracy: 2021/2128,  0.950 

Class: positive
Accuracy: 2180/2255,  0.967 

best epoch for the best F1 score (macro):  7


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'NlpHUST/vibert4news-base-cased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at NlpHUST/vibert4news-base-cased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 96/172,  0.558 

Class: neutral
Accuracy: 2041/2128,  0.959 

Class: positive
Accuracy: 2177/2255,  0.965 




In [None]:
prediction_accuracy_graph

In [None]:
# pre_trained_model: 'bert-base-uncased'

In [None]:
# Create reviews label dateset provided by Suwasit's Vietnamese team.
train_review_df = train_review_df_s.copy()
df = set_data_category_in_df(add_label_to_df(train_review_df))
print("\ndf['label'].value_counts():")
print(df['label'].value_counts())

# Set BERT model parameters and BERT model evaluation dataframe value. 
hidden_dropout_prob = 0.1
attention_probs_dropout_prob = 0.1
pre_trained_model = 'bert-base-uncased'
model_type = pre_trained_model.split('/')[0]
batch_size = 16
epochs = 10
Ir = 1e-5
eps = 1e-8
train_data_provider = 'Suwasit'

# Create tokenizer
tokenizer = BertTokenizer.from_pretrained(pre_trained_model, do_lower_case=True)

# Build train and validation dataloader
dataloader_train, dataloader_validation = build_dataloader(df, batch_size)

# Build and train BERT model. Generate BERT model and dataset eval_df for model evaluation metrics
model = build_Bert_model(pre_trained_model, attention_probs_dropout_prob, hidden_dropout_prob)
eval_df, model = train_model(Ir, eps, epochs, batch_size, dataloader_validation)

# Save eval_df for building visualization later on
eval_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_NLP_Epoch{epochs}_train_data_provided_by_{train_data_provider}_eval_df.csv'
eval_df.to_csv(eval_df_path, index=False)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

# Save trained BERT model.
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Calculate best epoch for the highest F1 macro score of the trained BERT model.
best_epoch_F1_score_macro = eval_df[ eval_df['F1_score_macro']==max(eval_df['F1_score_macro'])]['epoch'].values[0]
print('best epoch for the best F1 score (macro): ',best_epoch_F1_score_macro)

# Append eval_df path to eval_df_path_list, accuracy_per_class_df path to accuracy_per_class_path_list 
# trained BERT model path to model_path_list, and best epoch for the highest F1 macro socre to best_epoch_F1_score_macro_list.
eval_df_path_list.append(eval_df_path)
accuracy_per_class_path_list.append(accuracy_per_class_df_path)
model_path_list.append(model_path)
best_epoch_F1_score_macro_list.append(best_epoch_F1_score_macro)


df['label'].value_counts():
2    11271
1    10641
0      859
Name: label, dtype: int64


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

cuda


  0%|          | 0/10 [00:00<?, ?it/s]

Epoch 1:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 1
Training loss: 0.26620329798487874
Validation loss: 0.19561704585345038
F1 Score (Weighted): 0.9352772478750514
F1 Score (macro): 0.8016067878216154
F1 Score (micro): 0.9334796926454446


Epoch 2:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 2
Training loss: 0.1756164333429449
Validation loss: 0.17744597566575465
F1 Score (Weighted): 0.9421269166428765
F1 Score (macro): 0.8184073563177855
F1 Score (micro): 0.944456641053787


Epoch 3:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 3
Training loss: 0.14896892744234638
Validation loss: 0.18147392053884176
F1 Score (Weighted): 0.9416086976626675
F1 Score (macro): 0.8104737206089402
F1 Score (micro): 0.9451152579582875


Epoch 4:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 4
Training loss: 0.12468914617370377
Validation loss: 0.22478400592686068
F1 Score (Weighted): 0.9445368407990705
F1 Score (macro): 0.8408525673390308
F1 Score (micro): 0.9435784851811196


Epoch 5:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 5
Training loss: 0.10959845689067918
Validation loss: 0.2455505373277048
F1 Score (Weighted): 0.9440241751721709
F1 Score (macro): 0.8284327999813946
F1 Score (micro): 0.9457738748627883


Epoch 6:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 6
Training loss: 0.08784642187108145
Validation loss: 0.27117441919358576
F1 Score (Weighted): 0.9441007346735988
F1 Score (macro): 0.8300980671571656
F1 Score (micro): 0.9427003293084523


Epoch 7:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 7
Training loss: 0.07497347991921631
Validation loss: 0.2867763975759245
F1 Score (Weighted): 0.9435251545228501
F1 Score (macro): 0.8270204303910252
F1 Score (micro): 0.9429198682766191


Epoch 8:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 8
Training loss: 0.059454419009661516
Validation loss: 0.3081366915598393
F1 Score (Weighted): 0.9431883301098591
F1 Score (macro): 0.829713536625443
F1 Score (micro): 0.9422612513721186


Epoch 9:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 9
Training loss: 0.051125568383182905
Validation loss: 0.32973950718848133
F1 Score (Weighted): 0.9424195625326278
F1 Score (macro): 0.8265623400765055
F1 Score (micro): 0.9413830954994512


Epoch 10:   0%|          | 0/1139 [00:00<?, ?it/s]


Epoch 10
Training loss: 0.0466928171837833
Validation loss: 0.3345609385757901
F1 Score (Weighted): 0.9431699711118243
F1 Score (macro): 0.8275231482644173
F1 Score (micro): 0.9424807903402854
Prediction accuracy for individual class:
Class: negative
Accuracy: 102/172,  0.593 

Class: neutral
Accuracy: 2016/2128,  0.947 

Class: positive
Accuracy: 2175/2255,  0.965 

best epoch for the best F1 score (macro):  4


In [None]:
# Create dataframe model_df with trained BERT model performance metrics.
model_df = prepare_model_df(eval_df_path_list, accuracy_per_class_path_list, model_path_list)

# Set width, height, domain for BERT model evaluation graph.
width, height, domain = 200, 200, (0,1)

# Generate dataframe df_f1_scores_long which is a long form of dataset for BERT model F1 scores(macro, micro and weighted), train and validation losses.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=False)

# Predict emotions for sample customer reviews.
predict_emotion_test()

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""


F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative



In [None]:
prediction_accuracy_graph

In [None]:
# 'bert-base-uncased' Bert Model with best F1 score macro

In [None]:
# Build BERT model.
model = build_Bert_model(pre_trained_model,attention_probs_dropout_prob,hidden_dropout_prob)

# Let PyTorch to use the currently available device to load a tensor into memory.
model.to(device)

# Find the path as best_model_path for the BERT model trained at the best epoch at which the model has highest F1 macro score.
epoch = best_epoch_F1_score_macro
best_model_path = f'emotion_{model_type}_NLP_{epoch}.model'

# Load the best trained BERT model.
model.load_state_dict(torch.load(best_model_path, map_location=torch.device('cpu')))

# Save the best trained BERT model.
epochs = best_epoch_F1_score_macro
model_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}.model'
torch.save(model.state_dict(),model_path )

# Append the path of best trained BERT model to best_model_path_list. 
best_model_path_list.append(model_path)

# Add a column as best_model_path in model_df to store best_model_path_list.
model_df['best_model_path'] = best_model_path_list

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

In [None]:
# Predict the emotion of the sample of customer reviews.
predict_emotion_test()

# Generate dataframe df_f1_scores_long as long form of dataset for the Best Trained BERT model performance metrics.
df_f1_scores_long = prepare_data_for_metrics_graphs(eval_df, best_model=True)

# Use dataloader_validation to generate predictions and true_vals. 
# Create dataframe accuracy_per_class_df to store BERT model prediction accuracy of individual class.
_, predictions, true_vals = evaluate(dataloader_validation)
accuracy_per_class_df = accuracy_per_class(predictions, true_vals)

# Save accuracy_per_class_df.
accuracy_per_class_df_path = f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_{model_type}_train_data_provided_by_{train_data_provider}_NLP_Epoch{epochs}_accuracy_per_class_df.csv'
accuracy_per_class_df.to_csv(accuracy_per_class_df_path,index=False)

"""
Generate two graphs for BERT model evaluation metrics. 
1. F1_scores_graph: Trained BERT model F1 scores(macro, micro and weighted), train and validation losses. 
2. prediction_accuracy_graph: Trained BERT model prediction accuracy of individual classes.
"""

F1_scores_graph, prediction_accuracy_graph = prepare_metrics_graphs(df_f1_scores_long, 
                                                                    accuracy_per_class_df,
                                                                    domain, 
                                                                    width, 
                                                                    height, 
                                                                    epochs)
print("")   
F1_scores_graph

 
Predict emotion for 'Sản phẩm rất tốt, và mạnh mẽ':  positive
Predict emotion for 'Tôi cần ai đó hỗ trợ tôi cách sử dụng':  neutral
Predict emotion for 'giao hàng quá chậm':  negative
Prediction accuracy for individual class:
Class: negative
Accuracy: 112/172,  0.651 

Class: neutral
Accuracy: 2020/2128,  0.949 

Class: positive
Accuracy: 2166/2255,  0.961 




In [None]:
prediction_accuracy_graph

In [None]:
model_df.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_model_df.csv') 

In [None]:
width=120
height=110
domain=(0,1)

def draw_graph(df, domain, table_id, width,height, first_line_title, 
               second_line_title, third_line_title):
  
    """
    Create draw_graph function to plot graph for F1 scores(macro, micro and weighted), 
    train and validation losses of trained BERT model.
    """


    df=df[df['table_id']==table_id]

    base = alt.Chart(df).mark_line().encode(
        x='epoch'
    ).properties(
        width=width,
        height=height,
        title={
          "text": [first_line_title,second_line_title,third_line_title], 
          "color": "black",
          "subtitleColor": "black"
        }
    )

    graph=base.mark_line().encode(
        y=alt.Y("value:Q",title="",scale=alt.Scale(domain=domain)),
        color=alt.Color('line type:N',
                        legend=alt.Legend(
                        orient='bottom',
                        titleFontSize=11,
                        titleColor='black',
                        labelFontSize=10.5,
                        labelColor='black',
                        direction='horizontal'
                        )
                       ),
        tooltip=['emotion','epoch','line type','value']
        ).interactive()

    return graph

def combined_graph(df,width,height):

    """
    Create combined_graph function to combine all the graphs generated 
    by function draw_graph for trained BERT model metrics.
    """


    train_data_provider_dict={}
    train_data_type_dict={}
    model_type_dict={}
    emotion_dict={}
    VnEmoLex_dict={}

    for i in range(1,len(df['table_id'].unique())+1):
        train_data_provider_dict[i] = df[df['table_id']==i]['train_data_creator'].unique()[0].split("'")[0]
        train_data_type_dict[i] = df[df['table_id']==i]['train_data_type'].unique()[0].split("'")[0]
        model_type_dict[i] = df[df['table_id']==i]['pre_trained_model'].unique()[0].split('/')[0]
        emotion_dict[i] = df[df['table_id']==i]['emotion'].unique()[0]
        graph_temp = draw_graph(df, domain, i, width, height, model_type_dict[i],
                                emotion_dict[i], train_data_type_dict[i] )
        if i == 1:
            graph = graph_temp
        elif (i > 1) and (i <4): 
            graph = graph | graph_temp
            graph1 = graph
        elif i == 4:
            graph2 = graph_temp
        elif i > 4:
            graph2 = graph2 | graph_temp
     

    title = alt.Chart({"values": [{"text": "Sentiment Analysis Model Evaluation by Pre-trained BERT Model"}]}
                      ).mark_text(
                        size=15,dx=220,dy=0,color="black"
                      ).encode(
                      text='text:N'
                      ).properties(
                      width=width,height=100
                      )

    chart = (title & graph1 &   graph2
            ).configure_view(
                stroke=None
            ).configure_concat(
                spacing=20
            ).configure_title(
                fontSize=10.5
            )

    return chart


In [None]:
def prepare_metrics_df(model_df, colname):

    """
    Create prepare_metrics_df function to output dataframe df_metrics with trained BERT model metrics.
    """


    df_metrics = pd.DataFrame()

    for i in range(len(model_df)):  
        df_metrics_temp = pd.read_csv(list(model_df[colname])[i])
        df_metrics_temp['table_id'] = i+1
        df_metrics = pd.concat([df_metrics, df_metrics_temp], axis =0 )
    return df_metrics

# Import dataset for lists of paths of the files which have model evaluation metrics of the trained models. 
model_df = pd.read_csv(f'/content/drive/MyDrive/Realtime Dreamer/model_info Aug 10 2022.csv')

# Generate df_metrics dataset with model evaluation metrics of the trained models.
df_metrics = prepare_metrics_df(model_df, 'eval_df_path')

# Save df_metrics.
df_metrics.to_csv(f'/content/drive/MyDrive/Realtime Dreamer/sentiment_analysis_df_metrics.csv', index=False)

In [None]:
# Generated df_metrics_long as long format of df_metrics dataframe.
df_metrics_long = pd.melt(df_metrics, 
                          id_vars=['emotion', 'epoch','batch_size', 'Ir', 'eps', 'train_data_type', 'note', 'table_id',
                                   'pre_trained_model', 'hidden_dropout_prob', 'attention_probs_dropout_prob','train_data_creator'], 
                          value_vars=['train_loss', 'val_loss', 'F1_score_weighted', 'F1_score_macro', 'F1_score_micro'],
                          var_name=['line type'])

In [None]:
# Save df_metrics_long.
df_metrics_long.to_csv('df_metrics_long.csv', index=False)

In [None]:
# Plot graphs for BERT model evaluation metrics: F1 scores(macro, micro, weighted), train and validation losses
combined_graph(df_metrics_long, 180, 110)

In [None]:
""""
Prepare df_metrics_long_gb dataset with max F1 score macro for each of above Trained BERT models
in order to sort the graphs by max F1 score macro
"""


df_metrics_long_gb = df_metrics_long[df_metrics_long['line type']=='F1_score_macro'].groupby(['table_id',
                                                                                              'pre_trained_model',
                                                                                              'line type',
                                                                                              'train_data_creator',
                                                                                              'train_data_type'])['value'].max()
df_metrics_long_gb = df_metrics_long_gb.dropna().reset_index()                     
df_metrics_long_gb['model type'] = [model_type[0] for model_type in df_metrics_long_gb['pre_trained_model'].str.split('/')]
df_metrics_long_gb.sort_values(by=['value'], ascending=False, inplace=True)

tableid_modeltype_list = []

for id, type in zip(df_metrics_long_gb['table_id'], df_metrics_long_gb['model type']):
    tableid_modeltype_list.append((str(id) + '-' + type))
    
print(tableid_modeltype_list)

df_metrics_long_gb['table_id_model_type']=tableid_modeltype_list

sorted_table_id=list(df_metrics_long_gb['table_id'])
print(sorted_table_id)

df_metrics_long_gb['tableId_modelType']=tableid_modeltype_list

['2-NlpHUST', '3-bert-base-uncased', '1-trituenhantaoio', '4-trituenhantaoio', '6-bert-base-uncased', '5-NlpHUST']
[2, 3, 1, 4, 6, 5]


In [None]:
def prepare_metrics(df, df_long_gb, metric_col1, metric_col2):

    """
    Create prepare_metrics function to output metrics_list for BERT model performance metrics
    """


    metrics_list = []
    for ind,tbl_id in enumerate(sorted_table_id):
        metrics_list.append(df[(df['table_id']==tbl_id) & (df[metric_col1]==df_long_gb['value'].values[ind])][metric_col2].values[0])
    return metrics_list

F1_score_weighted_list = prepare_metrics(df_metrics, df_metrics_long_gb, 'F1_score_macro','F1_score_weighted')
F1_score_micro_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','F1_score_micro')
train_loss_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','train_loss')
val_loss_list = prepare_metrics(df_metrics, df_metrics_long_gb,'F1_score_macro','val_loss')

df_metrics_long_gb.rename(columns={'value':'F1_score_macro'}, inplace=True)
df_metrics_long_gb.drop(['line type'], axis=1, inplace=True)
df_metrics_long_gb['F1_score_micro'] = F1_score_micro_list
df_metrics_long_gb['F1_score_weighted'] = F1_score_weighted_list
df_metrics_long_gb['train_loss'] = train_loss_list
df_metrics_long_gb['val_loss'] = val_loss_list

In [None]:
def plot_metrics(df,score_column):
    
    """
    Create plot_metrics function to plot graph of trained BERT model metrics
    Output graph
    """


    graph = alt.Chart(df).mark_bar(size=10).encode(
        x = alt.X('tableId_modelType:N', sort=sorted_table_id),
        y = alt.Y(score_column, title='',scale=alt.Scale(domain=(0,1))),
        color = alt.Color('train_data_type:N',
                          scale=alt.Scale(scheme='redyellowgreen'),
                          legend=alt.Legend(orient='bottom',
                                            titleFontSize=11,
                                            titleColor='black',
                                            labelFontSize=10.5,
                                            labelColor='black',
                                            direction='horizontal')),
        tooltip = ['train_data_creator',
                   'pre_trained_model',
                   score_column]
        ).interactive(
        ).properties(width=140,
                     height=150,
                     title=score_column)
    return graph

def combined_metrics_graph(df_metrics_long_gb):

    """
    Create combined_metrics_graph function to combine graphs for trained BERT models.
    Output combined graph.
    """


    title = alt.Chart({"values": [{"text": "Sentiment Analysis Model Evaluation by Metrics"}]}
                      ).mark_text(size=15, dx=350, dy=0, color="black"
                      ).encode(text='text:N'
                      ).properties(width=110,height=150)

    chart = plot_metrics(df_metrics_long_gb, 'F1_score_macro') | plot_metrics(df_metrics_long_gb,'F1_score_weighted')| \
            plot_metrics(df_metrics_long_gb, 'F1_score_micro') | plot_metrics(df_metrics_long_gb,'val_loss')| \
            plot_metrics(df_metrics_long_gb,'train_loss') 

    chart1 = (title & chart
              ).configure_view(stroke=None
              ).configure_concat(spacing=15
              ).configure_title(fontSize=12)

    return chart1

combined_metrics_graph(df_metrics_long_gb)

In [None]:
# Import dataset for prediction accuracy of individual classes of trained BERT model
acc_per_class_combined_df = pd.read_csv(f'/content/drive/MyDrive/Realtime Dreamer/acc_per_class_combined_df Aug 11 2022.csv')
acc_per_class_combined_df.head()

Unnamed: 0,class,score,pre_trained_model,epoch,train_data_creator,table_id,train_data_type,tableId_modelType
0,negative,1.0,trituenhantaoio,9,Yunhong He,1,Yunhong Keyword + Oversample,1-trituenhantaoio
1,neutral,0.962963,trituenhantaoio,9,Yunhong He,1,Yunhong Keyword + Oversample,1-trituenhantaoio
2,positive,0.994307,trituenhantaoio,9,Yunhong He,1,Yunhong Keyword + Oversample,1-trituenhantaoio
3,negative,1.0,NlpHUST,2,Yunhong He,2,Yunhong Keyword + Oversample,2-NlpHUST
4,neutral,0.962963,NlpHUST,2,Yunhong He,2,Yunhong Keyword + Oversample,2-NlpHUST


In [None]:
def plot_accuracy_scores(acc_per_class_combined_df):

    """
    Create plot_accuracy_scores function to plot prediction accuracy of individual classes of trained BERT models.
    """


    tableid_modeltype_list = []
    
    for id, type in zip(acc_per_class_combined_df['table_id'], acc_per_class_combined_df['pre_trained_model']):
        tableid_modeltype_list.append((str(id) +'-' + type))

    acc_per_class_combined_df['tableId_modelType'] = tableid_modeltype_list
    sorted_table_id2 = list(acc_per_class_combined_df[acc_per_class_combined_df['class']=='negative'].sort_values(by=['score'],ascending=[False])['tableId_modelType'])
    acc_per_class_combined_df= acc_per_class_combined_df.sort_values(by=['class','score'],ascending=[True,False])

    title = alt.Chart({"values": [{"text": "Sentiment Analysis Model Evaluation by Individual Class Prediction Accuracy"}]}
                      ).mark_text(size=15, dx=250, dy=0, color="black"
                      ).encode(text='text:N'
                      ).properties(width=110,height=150)

    graph = alt.Chart(acc_per_class_combined_df).mark_bar(size=10).encode(
            x = alt.X('class:N',title=""),
            y = alt.Y('score:Q'),  
            color = alt.Color('train_data_type:N', scale=alt.Scale(scheme='redyellowgreen'),
                              legend=alt.Legend(
                              orient='bottom',
                              titleFontSize=11,
                              titleColor='black',
                              labelFontSize=10.5,
                              labelColor='black',
                              direction='horizontal')),
            column=alt.Column('tableId_modelType',title='',sort=sorted_table_id2),
            tooltip=['score','train_data_creator','epoch','pre_trained_model']
      
        ).properties(width=85,height=100)
    chart = (title & graph
            ).configure_view(stroke=None
            ).configure_concat(spacing=15
            ).configure_title(fontSize=12)
    return chart

plot_accuracy_scores(acc_per_class_combined_df)