# Evaluation

## Preliminaries

### Imports

In [1]:
import sys
import os
sys.path.append('./src')

In [2]:
import pandas as pd

In [3]:
from programmable_chatbot.data.corpora import HOPE
from programmable_chatbot.chatbot_api import Chatbot

In [4]:
import numpy as np

In [5]:
import torch

In [6]:
import random

### Constants

In [7]:
DATA_PATH = '../resources/data/cache'

In [8]:
TOKENIZER = 'gpt2'
MODEL_PATH = '../experiments/PROGCHAT/gpt2_large_2023_01_06_12_19_33/model/best_checkpoint'

In [9]:
OUT_DF_COLUMNS = ['Split', 'Corpus', 'Conversation ID', 'Turn IDX', 'Speaker', 'Context', 'Last message', 'Response', 'Model']

In [10]:
GENERATE_KWARGS = {
    'top_p': 0.9, 'top_k': 0, 'temperature': 1.0, 'do_sample': True, 'max_new_tokens': 128
    # 'top_p': 1.0, 'top_k': 0, 'temperature': 0.95, 'do_sample': True, 'max_new_tokens': 128
}

In [11]:
N_SAMPLES = 100

In [12]:
RANDOM_SEED = 2307

### Model and tokenizer

In [13]:
chatbot = Chatbot(
    MODEL_PATH,
    TOKENIZER
)

### Random seed

In [14]:
random.seed(RANDOM_SEED)
np.random.seed(RANDOM_SEED)
torch.manual_seed(RANDOM_SEED)

<torch._C.Generator at 0x7f0601b14810>

## Data

### Load data

In [15]:
hope = HOPE(
    '../resources/data/raw/HOPE_WSDM_2022',
    'test',
    chatbot.tokenizer,
    augmentation=True,
    dropout=True,
    max_chunk_turns=8,
    max_context_turns=3,
    min_turns=3,
    random_seed=2307
)
data = hope.get_data_for_evaluation()

### Randomly sample conversations

In [16]:
eval_data = []
for i in range(N_SAMPLES):
    dialogue = random.choice(data['generator']['conditioned'])
    t = random.choice([j for j in range(len(dialogue['utterances'])) if dialogue['utterances'][j][0].startswith('T')])
    eval_data.append(
        {'task_description': dialogue['task_description'], 'utterances': dialogue['utterances'][:t + 1]}
    )

## Task description

In [17]:
task = 'The following is a therapy session between an empathetic therapist AI, called TherapyBot, ' \
       'and a person, called Patient.\n\n' \
       'In the following interactions, TherapyBot and Patient will converse in natural language. ' \
       'The Patient talks about his/hers issues to TherapyBot and ' \
       'TherapyBot helps the Patient to explore and solve his/hers problems. ' \
       'TherapyBot reacts empathetically giving informative and supportive responses.\n' \
       'The conversation is grounded in the persona description of the speakers. \n' \
       'A persona description is a short description in a few sentences ' \
       'of the personal information of one or both speakers.'
global_label = 'Persona description of the speakers: \n' \
               'TherapyBot persona: My name is TherapyBot and I am a therapist AI. ' \
               'I use empathy to connect with my patients. ' \
               'I want to help my patients feel better. I like offering support to people.'

## Evaluation data preparation

### Responses generation

In [18]:
out_data = []

# Iterate overlm_headialogues
for sample in eval_data:
    # Prepare dialogue history
    message = f'Patient: {sample["utterances"][-2][1].strip()}' if len(sample['utterances']) > 1 else ''
    # Prepare context
    context = [prompt + text for prompt, text in sample['utterances'][:-1]]
    # Prepare original response
    prompt, text = sample['utterances'][-1]
    original_response = f'Therapist:{text}'
    # Generate response with base model
    response_baseline = chatbot.generate(
        context,
        prompt=prompt,
        task_description=sample['task_description'],
        **GENERATE_KWARGS
    )
    response_baseline = f'Therapist: {response_baseline}'
    # Prepare context
    context_ = [prompt.replace('Therapist', 'TherapyBot') + text for prompt, text in sample['utterances'][:-1]]
    # Prepare original response
    prompt, text = sample['utterances'][-1]
    prompt = prompt.replace('Therapist', 'TherapyBot')
    # Generate response with fine-tuned model
    response = chatbot.generate(
        context_,
        prompt=prompt,
        task_description=task,
        global_labels=global_label,
        **GENERATE_KWARGS
    )
    response = f'Therapist: {response}'
    # Add original and generated responses to output data
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, original_response, 'Ground truth'))
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, response_baseline, 'Therapist simulation'))
    out_data.append(('test', 'HOPE', None, None, 'Therapist', context, message, response, 'Prompted empathy therapy AI'))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generati

In [19]:
out_df = pd.DataFrame(out_data, columns=OUT_DF_COLUMNS)
out_df

Unnamed: 0,Split,Corpus,Conversation ID,Turn IDX,Speaker,Context,Last message,Response,Model
0,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker res...,Patient: That way I can kind of relive a story...,"Therapist: right. So the idea is, this is how ...",Ground truth
1,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker res...,Patient: That way I can kind of relive a story...,"Therapist: so the scenario is, you have that e...",Therapist simulation
2,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker res...,Patient: That way I can kind of relive a story...,Therapist: just a little bit about maybe kind ...,Prompted empathy therapy AI
3,test,HOPE,,,Therapist,"[Therapist [dialogue act category: general, di...","Patient: No ifs, ands or buts.",Therapist: GEORGE You're right handed or left ...,Ground truth
4,test,HOPE,,,Therapist,"[Therapist [dialogue act category: general, di...","Patient: No ifs, ands or buts.","Therapist: All right, Now what do you say to t...",Therapist simulation
...,...,...,...,...,...,...,...,...,...
295,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker ini...,Patient: You're right. I don't want I don't wa...,"Therapist: so we need to change your thought, ...",Therapist simulation
296,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker ini...,Patient: You're right. I don't want I don't wa...,Therapist: I don't think that's a good idea. I...,Prompted empathy therapy AI
297,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker res...,Patient: Yeah.,Therapist: What's something that you could sen...,Ground truth
298,test,HOPE,,,Therapist,[Therapist [dialogue act category: speaker res...,Patient: Yeah.,"Therapist: So in the moment, in the presentati...",Therapist simulation


### Serialise data

In [20]:
out_df.to_csv(os.path.join(DATA_PATH, 'ppm_dlm_empathy_assessment_samples.csv'), index=False)