# Evaluation

## Preliminaries

### Imports

In [None]:
import sys
import os
sys.path.append('./src')

In [None]:
import pandas as pd

In [None]:
from programmable_chatbot.data.corpora import HOPE
from programmable_chatbot.chatbot_api import Chatbot

In [None]:
import numpy as np

In [None]:
import torch

In [None]:
import random

### Constants

In [None]:
DATA_PATH = 'resources/data/cache'

In [None]:
TOKENIZER = 'gpt2'
MODEL_PATH = '../experiments/PROGCHAT/gpt2_large_2023_01_06_12_19_33/model/best_checkpoint'

In [None]:
OUT_DF_COLUMNS = ['Split', 'Corpus', 'Conversation ID', 'Turn IDX', 'Speaker', 'Context', 'Last message', 'Response', 'Model']

In [None]:
GENERATE_KWARGS = {
    'top_p': 1.0, 'top_k': 0, 'temperature': 0.95, 'do_sample': True, 'max_new_tokens': 256
}

In [None]:
N_SAMPLES = 100

In [None]:
RANDOM_SEED = 2307

### Model and tokenizer

In [None]:
chatbot = Chatbot(
    MODEL_PATH,
    TOKENIZER
)

### Random seed

In [None]:
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

## Data

### Load data

In [None]:
hope = HOPE(
    'resources/data/raw/HOPE_WSDM_2022',
    'test',
    chatbot.tokenizer,
    augmentation=True,
    dropout=True,
    max_chunk_turns=8,
    max_context_turns=3,
    min_turns=3,
    random_seed=2307
)
data = hope.get_data_for_evaluation()

### Randomly sample conversations

In [None]:
eval_data = []
for i in range(N_SAMPLES):
    dialogue = random.choice(data['generator']['conditioned'])
    t = random.choice([j for j in range(len(dialogue['utterances'])) if dialogue['utterances'][j][0].startswith('T')])
    eval_data.append(
        {'task_description': dialogue['task_description'], 'utterances': dialogue['utterances'][:t + 1]}
    )

## Task description

In [None]:
task = 'The following is a therapy session between an empathetic therapist AI, called TherapyBot, ' \
       'and a person, called Patient.\n\n' \
       'In the following interactions, TherapyBot and Patient will converse in natural language. ' \
       'The Patient talks about his/hers issues to TherapyBot and ' \
       'TherapyBot helps the Patient to explore and solve his/hers problems. ' \
       'TherapyBot reacts empathetically giving informative and supportive responses.\n' \
       'The conversation is grounded in the persona description of the speakers. \n' \
       'A persona description is a short description in a few sentences ' \
       'of the personal information of one or both speakers.'
global_label = 'Persona description of the speakers: \n' \
               'TherapyBot persona: My name is TherapyBot and I am a therapist AI. ' \
               'I use empathy to connect with my patients. ' \
               'I want to help my patients feel better. I like offering support to people.'

## Evaluation data preparation

### Responses generation

In [None]:
out_data = []

In [None]:
# Iterate over dialogues
for sample in eval_data:
    # Prepare dialogue history
    message = f'Patient:{sample["utterances"][-2][0]}' if len(sample['utterances']) > 1 else ''
    # Prepare context
    context = [prompt + text for prompt, text in sample['utterances'][:-1]]
    # Prepare original response
    prompt, text = sample['utterances'][-1]
    original_response = f'Therapist:{text}'
    # Generate response with base model
    response_baseline = chatbot.generate(
        context,
        prompt=prompt,
        task_description=sample['task_description'],
        **GENERATE_KWARGS
    )
    response_baseline = f'Therapist:{response_baseline}'
    # Generate response with fine-tuned model
    response = chatbot.generate(
        context,
        prompt=prompt,
        task_description=task,
        global_labels=global_label,
        **GENERATE_KWARGS
    )
    response = f'Therapist:{response}'
    # Add original and generated responses to output data
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, original_response, 'Ground truth'))
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, response_baseline, 'DLDLM'))
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, response, 'Therapy-DLDLM'))

In [None]:
out_df = pd.DataFrame(out_data, columns=OUT_DF_COLUMNS)

### Serialise data

In [None]:
out_df.to_csv(os.path.join(DATA_PATH, 'empathy_assessment_samples.csv'), index=False)