In [3]:
import json
import keras
from keras.preprocessing.sequence import pad_sequences
import numpy as np
import pandas as pd

import sparknlp
from sparknlp.annotator import Tokenizer as SparkTokenizer
from sparknlp.base import DocumentAssembler

from src.core import file_manager as fm

In [4]:
spark = sparknlp.start(spark32=True)

In [5]:
class Predictor:
  def __init__(self, embedding_name):
    self.embedding_name = embedding_name
    self.working_dir = fm.filename_from_data_dir(f'output/lstm_models/patient/{self.embedding_name}')
    self.model = self.load_lstm_model()
    self.vocabullary = self.load_vocabullary()
    self.metadata = self.load_metadata()
    self.intents = self.metadata['intents']
    self.vector_length = int(self.metadata['vector_length'])
  

  def load_lstm_model(self):
    return keras.models.load_model(f'{self.working_dir}/model.h5')

  def load_vocabullary(self):
    file = open(f'{self.working_dir}/vocabullary.json',"r")

    return json.load(file)
  
  def load_metadata(self):
    file = open(f'{self.working_dir}/metadata.json',"r")

    return json.load(file)
  

  def get_tokens(self, text):
    spark_df = spark.createDataFrame([[text]]).toDF("text")

    doc_df = DocumentAssembler().setInputCol("text").setOutputCol("document").transform(spark_df)

    token_df = SparkTokenizer().setInputCols(["document"]).setOutputCol("token").fit(doc_df).transform(doc_df)

    return token_df.select('token.result').toPandas().loc[0]['result']

  def convert_text_to_input_model(self, tokens):
    return [self.vocabullary[token] if token in self.vocabullary else 0 for token in tokens]

  def print_intent_probabilities(self, prediction):
    print('This is the distribuition of the prediction by intent:')
    intent_probabilities = {intent: prediction[0][index] for index, intent in enumerate(self.intents)}

    for item in sorted(intent_probabilities.items(), key=lambda item: -item[1]):
      print(f'{item[0]} = {(item[1] *100):.4f}%')

  def print_pred_real_label(self, text, print_probabilities=False):
    tokens = self.get_tokens(text)

    indexes_vector = self.convert_text_to_input_model(tokens)

    padding_vector = pad_sequences(
      maxlen=self.vector_length, sequences=[indexes_vector], value=0, padding='post', truncating='post'
    )

    prediction = self.model.predict(padding_vector)

    index_label_predicted = np.argmax(prediction)
    
    predicted_label = self.intents[index_label_predicted]    
    percentage = f'{(prediction[0][index_label_predicted] *100):.4f}'

    print(f'The intent predicted was: {predicted_label} with: {percentage}%\n')

    if print_probabilities:
      self.print_intent_probabilities(prediction)

### Using Bert predictor

In [7]:
predictor_glove = Predictor('glove')

In [17]:
predictor_glove.print_pred_real_label('Estou tomando paracetamol')
predictor_glove.print_pred_real_label('Estou tomando Tylenol')

The intent predicted was: inform_medicine with: 99.1653%

The intent predicted was: inform_medicine with: 86.7540%



In [10]:
predictor_glove.print_pred_real_label('Eu estou com febre e dor de cabeça')

The intent predicted was: inform_symptoms with: 99.9562%



In [11]:
predictor_glove.print_pred_real_label('Olá bom dia, muito obrigado!')

The intent predicted was: greeting with: 99.9407%



In [12]:
predictor_glove.print_pred_real_label('Gostaria de saber qual remédio, eu devo tomar para fortes dores no peito?')

The intent predicted was: request_inform with: 81.4230%



In [13]:
predictor_glove.print_pred_real_label('Estou tomando Tylenol e paracetamol')

The intent predicted was: inform_medicine with: 99.4024%



### Using Bert predictor

In [18]:
predictor_bert_pt = Predictor('bert_pt')

In [19]:
predictor_bert_pt.print_pred_real_label('Estou tomando paracetamol')
predictor_bert_pt.print_pred_real_label('Estou tomando Tylenol')

The intent predicted was: inform_symptoms with: 69.7415%

The intent predicted was: inform_symptoms with: 96.5550%



In [10]:
predictor_bert_pt.print_pred_real_label('Eu estou com febre e dor de cabeça')

The intent predicted was: inform_symptoms with: 99.9995%



In [11]:
predictor_bert_pt.print_pred_real_label('Olá bom dia, muito obrigado!')

The intent predicted was: greeting with: 99.9946%



In [12]:
predictor_bert_pt.print_pred_real_label('Gostaria de saber qual remédio, eu devo tomar para fortes dores no peito?')

The intent predicted was: request_inform with: 99.9879%



In [13]:
predictor_bert_pt.print_pred_real_label('Estou tomando Tylenol e paracetamol')

The intent predicted was: inform_symptoms with: 75.7815%



In [9]:
df = fm.read_annotated_df_with_embeddings('bert_pt')

df[df['intent'] == 'inform_medicine']

Unnamed: 0,txt,annotated_txt,intent,embeddings,tokens,word_embeddings
12,Durante esse período de febre tomei Dipirona m...,Durante esse período de [febre](SINTOMA) tomei...,inform_medicine,"[[0.16708213, -0.18079649, 0.22830229, 0.00992...","[Durante, esse, período, de, febre, tomei, Dip...","[[0.40036896, -0.46102285, 0.5211087, -0.43017..."
14,Febre controlada com Dipirona 1g,[Febre](SINTOMA) controlada com Dipirona 1g,inform_medicine,"[[0.27242714, -0.12426308500000001, 0.32287258...","[Febre, controlada, com, Dipirona, 1g]","[[0.08862357, -0.17657822, 0.06212966, -0.0467..."
15,Não foi feito nenhum exame. O médico passou Di...,Não foi feito nenhum exame. O médico passou Di...,inform_medicine,"[[0.14055879999999998, -0.24052137, 0.11373424...","[Não, foi, feito, nenhum, exame, ., O, médico,...","[[0.46928582, -0.60513246, -0.0158731040000000..."
30,Hoje só tomei o antialérgico Alektos Já vinha...,Hoje só tomei o antialérgico Alektos Já vinha...,inform_medicine,"[[0.2983539, -0.27068886000000003, 0.40885243,...","[Hoje, só, tomei, o, antialérgico, Alektos, Já...","[[0.47601914, -0.6982294, -0.26847202, -0.1179..."
35,Tylenol ou paracetamol,Tylenol ou paracetamol,inform_medicine,"[[0.36587963, -0.062025815000000005, 0.1732032...","[Tylenol, ou, paracetamol]","[[0.50390345, -0.0805911, 0.1411364, 0.1437646..."
...,...,...,...,...,...,...
8796,Fui ao posto e nao tenho nada,Fui ao posto e nao tenho nada,inform_medicine,"[[0.15760449, -0.48902049999999997, -0.1158888...","[Fui, ao, posto, e, nao, tenho, nada]","[[-0.07302006, -0.37603139999999996, -0.113624..."
8801,Não tenho como medir tô sem termometro,Não tenho como medir tô sem termometro,inform_medicine,"[[-0.08875085, -0.31451717, -0.2268491, 0.2497...","[Não, tenho, como, medir, tô, sem, termometro]","[[0.26180577, -0.4894693, 0.14541183, 0.372275..."
8809,Eu não tenho nada além disso,Eu não tenho nada além disso,inform_medicine,"[[0.23736839, -0.33511838, 0.049474504, 0.0154...","[Eu, não, tenho, nada, além, disso]","[[0.41178796, -0.38163227, 0.60377157, 0.05930..."
8814,Nunca tomei nenhum remédio,Nunca tomei nenhum remédio,inform_medicine,"[[0.40804726, -0.27598658, -0.18077752, -0.011...","[Nunca, tomei, nenhum, remédio]","[[0.46963206, -0.7163525, 0.11846268, 0.258692..."
