In [24]:
import json
import os
from typing import List, Optional, Dict, Any, Tuple
import torch as t
from transformers import AutoTokenizer, AutoModelForCausalLM
from conversation_lib import Conversation
from tqdm import tqdm
from gpt_lib import rate_answer


In [17]:
def load_data(datafile: str) -> List[Dict[str, Any]]:
    with open(datafile, 'r') as f:
        data = json.load(f)
    return data
def save_data(data: List[Dict[str, Any]], datafile: str) -> None:
    with open(datafile,'w') as f:
        json.dump(data, f, indent=4)

In [18]:
datafile = 'no_reminders.json'
data = load_data(datafile)
answers_list_list = data["answers_list_list"]
rating_list_list = []
task_list = load_data('tasks.json')

In [20]:
for answers_list in answers_list_list:
    rating_list = []
    for answers , task in zip(answers_list, task_list):
        request = task["message_start"]
        question = task["message_end"]
        ratings = []
        for answer in answers:
            rating = rate_answer(request, question, answer)
            ratings.append(rating)
        rating_list.append(ratings)
    rating_list_list.append(rating_list)

In [21]:
data["rating_list_list" ] = rating_list_list
save_data(data, datafile)

In [31]:
def count_classification(rating_list: List[List[Tuple[str,str,int]]]) -> int:
    classification_dict = {}
    translation_dict = {"Correct": "Correct",
                        "1":"Correct",
                        "Refusal":"Refusal",
                        "2":"Refusal",
                        "Incorrect":"Incorrect",
                        "3":"Incorrect"}
    for ratings in rating_list:
        for rating in ratings:
            classification = rating[0]
            classification = translation_dict[classification]
            if classification not in classification_dict:
                classification_dict[classification] = 0
            classification_dict[classification] += 1
    return classification_dict
classification_dict_1 = count_classification(rating_list_list[0])
classification_dict_2 = count_classification(rating_list_list[1])

In [35]:
classification_dict_1

{'Incorrect': 38, 'Correct': 78, 'Refusal': 58}

In [36]:
classification_dict_2

{'Incorrect': 59, 'Refusal': 113, 'Correct': 2}

In [12]:
answers_list_list = [answers_list[1:] for answers_list in answers_list_list]

In [15]:
data["answers_list_list"] = answers_list_list

save_data(data, datafile)