In [1]:
%load_ext autoreload

%autoreload 2

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split


from src.core import file_manager as fm
from src.nlu_builder.scripts import setup_data

In [None]:
actor = 'patient'
variation = 'without_others_intent/k100_without_sentences_higher_than_median'
work_dir = fm.filename_from_data_dir( f'output/{actor}/{variation}')

sentences_to_ignore = pd.read_csv(f'{work_dir}/intersection_300_sentences.csv')

In [4]:
file = fm.filename_from_data_dir( f'output/patient/{variation}/intersection_300_sentences_with_label.csv')

data_to_valid = pd.read_csv(file)

data_to_valid = data_to_valid.loc[data_to_valid.intent != 'others']

data_to_valid.intent.value_counts()

setup_data.generate_nlu_file_from_df(data_to_valid, f'{work_dir}/intersection_300_sentences_with_label.yml')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/intersection_300_sentences_with_label.yml
getting intents...
The intent: greeting, has 58 examples
The intent: inform_medicine, has 14 examples
The intent: inform_symptoms, has 182 examples
The intent: request_inform, has 43 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/intersection_300_sentences_with_label.yml


In [3]:
def create_train_test_data(embedding_name):
  embedding_dir = f'{work_dir}/{embedding_name}'

  df = fm.read_annotated_df_with_embeddings(embedding_name)

  df_to_use = df.loc[~df['txt'].isin(sentences_to_ignore['txt'])]
  df_train, df_test = train_test_split(df_to_use, test_size=0.3, random_state=42)

  df_train.to_csv(f'{embedding_dir}/training_data.csv', index=False)
  df_test.to_csv(f'{embedding_dir}/test_data.csv', index=False)

  setup_data.generate_nlu_file_from_df(df_train, f'{embedding_dir}/training_data.yml')
  setup_data.generate_nlu_file_from_df(df_test, f'{embedding_dir}/test_data.yml')
  
  return df_train.txt.shape, df_test.txt.shape

In [5]:
create_train_test_data('bert_pt')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/bert_pt/training_data.yml
getting intents...
The intent: greeting, has 785 examples
The intent: inform_medicine, has 623 examples
The intent: inform_symptoms, has 3699 examples
The intent: request_inform, has 868 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/bert_pt/training_data.yml
generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/bert_pt/test_data.yml
getting intents...
The intent: greeting, has 332 examples
The intent: inform_medicine, has 255 examples
The intent: inform_symptoms, has 1599 examples
The intent: request_inform, has 376 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_inten

((5975,), (2562,))

In [6]:
create_train_test_data('flair_pt')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/flair_pt/training_data.yml
getting intents...
The intent: greeting, has 685 examples
The intent: inform_medicine, has 293 examples
The intent: inform_symptoms, has 4232 examples
The intent: request_inform, has 768 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/flair_pt/training_data.yml
generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/flair_pt/test_data.yml
getting intents...
The intent: greeting, has 297 examples
The intent: inform_medicine, has 123 examples
The intent: inform_symptoms, has 1800 examples
The intent: request_inform, has 342 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_in

((5978,), (2562,))

In [7]:
create_train_test_data('glove')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/glove/training_data.yml
getting intents...
The intent: greeting, has 621 examples
The intent: inform_medicine, has 230 examples
The intent: inform_symptoms, has 4839 examples
The intent: request_inform, has 330 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/glove/training_data.yml
generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/glove/test_data.yml
getting intents...
The intent: greeting, has 272 examples
The intent: inform_medicine, has 100 examples
The intent: inform_symptoms, has 2062 examples
The intent: request_inform, has 146 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100

((6020,), (2580,))

In [8]:
create_train_test_data('lasbe')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/lasbe/training_data.yml
getting intents...
The intent: greeting, has 677 examples
The intent: inform_medicine, has 664 examples
The intent: inform_symptoms, has 3138 examples
The intent: request_inform, has 915 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/lasbe/training_data.yml
generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/lasbe/test_data.yml
getting intents...
The intent: greeting, has 277 examples
The intent: inform_medicine, has 317 examples
The intent: inform_symptoms, has 1351 examples
The intent: request_inform, has 368 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100

((5394,), (2313,))

In [9]:
create_train_test_data('use')

generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/use/training_data.yml
getting intents...
The intent: greeting, has 462 examples
The intent: inform_medicine, has 646 examples
The intent: inform_symptoms, has 2771 examples
The intent: request_inform, has 760 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/use/training_data.yml
generating /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_without_sentences_higher_than_median/use/test_data.yml
getting intents...
The intent: greeting, has 206 examples
The intent: inform_medicine, has 275 examples
The intent: inform_symptoms, has 1149 examples
The intent: request_inform, has 359 examples
the content was saved in: /home/valmir/dev/python/intent_classifier/data/output/patient/without_others_intent/k100_witho

((4639,), (1989,))