In [9]:
import json
import plotly.graph_objects as go
import pandas as pd
from sklearn.metrics import matthews_corrcoef

from src.core import file_manager as fm
from src.embeddings.constants import EMBEDDING_MODELS_TRANSLATION

In [2]:
VARIATIONS_TRANSLATE = {
    'k100': 'Todas as sentenças',
    'k100_without_outliers': 'Sem outliers',
    'k100_without_sentences_higher_than_median': 'Distância até a Mediana'
}


def get_results(base_dir):
    results = {}

    for variation in VARIATIONS_TRANSLATE.keys():
        variation_translated = VARIATIONS_TRANSLATE[variation]
        results[variation_translated] = {}

        for embedding in EMBEDDING_MODELS_TRANSLATION.keys():
            filename = f'{base_dir}/{variation}/{embedding}/results/intent_report.json'

            intent_report = json.load(open(filename))

            embedding_translated = EMBEDDING_MODELS_TRANSLATION[embedding]

            results[variation_translated][embedding_translated] = intent_report['accuracy']

    return results


def plot_chart(results):
    data = [
        go.Bar(
            name=variation,
            x=list(results[variation].keys()),
            y=list(results[variation].values()),
            marker_color=color
        )

        for variation, color in zip(results.keys(), ['DarkGoldenRod', 'DarkOliveGreen', 'DarkSlateBlue'])
    ]

    fig = go.Figure(data=data, layout_yaxis_range=[0, 1])

    # Change the bar mode
    fig.update_layout(barmode='group',  xaxis_tickangle=-30)
    fig.show()


def run_pipeline(path):
    results = get_results(path)

    plot_chart(results)


In [3]:
results = get_results(fm.filename_from_data_dir('nlu_models/patient'))

results_without_others_intent = get_results(fm.filename_from_data_dir('nlu_models/patient/without_others_intent'))

In [4]:
pd.DataFrame(results)

Unnamed: 0,Todas as sentenças,Sem outliers,Distância até a Mediana
BERTimbau,0.773514,0.790574,0.815966
FLAIR,0.794319,0.793016,0.820443
Glove,0.801919,0.802731,0.836598
LaBSE,0.835181,0.838893,0.895025
MUSE,0.863585,0.878998,0.914925


In [5]:
pd.DataFrame(results_without_others_intent)

Unnamed: 0,Todas as sentenças,Sem outliers,Distância até a Mediana
BERTimbau,0.89059,0.888889,0.914027
FLAIR,0.908816,0.925235,0.930995
Glove,0.930167,0.93768,0.961049
LaBSE,0.919325,0.925691,0.965044
MUSE,0.945073,0.950379,0.964887


### Results with 20% of data to test

In [4]:
import pandas as pd

pd.DataFrame(results)

Unnamed: 0,Todas as sentenças,Sem outliers,Distância até a Mediana
BERTimbau,0.7849,0.796314,0.813875
FLAIR,0.804523,0.795429,0.832152
Glove,0.798542,0.817856,0.831852
LaBSE,0.83461,0.851592,0.891791
MUSE,0.874416,0.880453,0.91791


In [5]:
pd.DataFrame(results_without_others_intent)

Unnamed: 0,Todas as sentenças,Sem outliers,Distância até a Mediana
BERTimbau,0.89229,0.877239,0.927036
FLAIR,0.919286,0.917555,0.927602
Glove,0.934948,0.945182,0.960674
LaBSE,0.917761,0.920836,0.961923
MUSE,0.94328,0.943182,0.971861


In [6]:
run_pipeline(fm.filename_from_data_dir('nlu_models/patient'))

In [7]:
run_pipeline(fm.filename_from_data_dir('nlu_models/patient/without_others_intent'))

In [8]:
import copy

results_merged = copy.deepcopy(results)

for variation in results_without_others_intent.keys():
  for embedding_key in results_without_others_intent[variation].keys():
    new_key = f'{embedding_key} ~ (others)'
    results_merged[variation][new_key] = results_without_others_intent[variation][embedding_key]
    
plot_chart(results_merged)

### Mathews score:

In [16]:
intent_indexes_dict = {
  'greeting': 0,
  'inform_medicine': 1,
  'inform_symptoms': 2,
  'request_inform': 3,
}

In [7]:
base_dir = fm.filename_from_data_dir(
  'output/patient/without_others_intent/k100_without_sentences_higher_than_median'
)

In [45]:
def print_matthews_corrcoef(csv_file):
  for model_name in EMBEDDING_MODELS_TRANSLATION.keys():  
    file_name = f'{base_dir}/{model_name}/{csv_file}.csv'
    df = pd.read_csv(file_name)
    df = df[df['original_intent'] != 'others']

    Y_true = df['original_intent'].map(intent_indexes_dict)
    Y_pred = df['intent_predicted'].map(intent_indexes_dict)

    print(f'{model_name}:', matthews_corrcoef(Y_true, Y_pred))
    print('=====================================')

In [46]:

print_matthews_corrcoef('nlu_predictions')

bert_pt: 0.8453605956101456
flair_pt: 0.8663185924460697
glove: 0.9027838293040308
lasbe: 0.9227734423841301
use: 0.940827585100359


In [47]:
print_matthews_corrcoef('nlu_predictions_of_intersection')

bert_pt: 0.8853673976673359
flair_pt: 0.8862491783743994
glove: 0.752149604657755
lasbe: 0.940800042934086
use: 0.9288000951722966
