## Notebooks to make the queries to the api of the models to get the response

In [1]:
import pandas as pd
import requests
from typing import Dict
import time
import os
import json
import re

### Reading the data

In [2]:
data = pd.read_csv("../02-dataset/data/cleaned/all_questions_df_english.csv")
data

Unnamed: 0.1,Unnamed: 0,description,alternatives,year,subject,ground_truth,index_question,text_concat_english,text_concat_portuguese
0,0,Questão 4) A Minor Bird I have wished a bird w...,"['A) culpa por não poder cuidar do pássaro.', ...",2020,"Linguagens, códigos e suas tecnologias",D,4,Question 4) Minor Bird I Have Wished a Bird Wo...,Questão 4) A Minor Bird I have wished a bird w...
1,1,"Questão 5) Finally, Aisha finished with her cu...","['A) reforçam um padrão de beleza.', 'B) retra...",2020,"Linguagens, códigos e suas tecnologias",C,5,"Question 5) Finally, Aisha Finished with Her C...","Questão 5) Finally, Aisha finished with her cu..."
2,2,Questão 01) Pablo Pueblo Regresa un hombre en ...,['A) contrapor a individualidade de um sujeito...,2020,"Linguagens, códigos e suas tecnologias",A,6,Question 01) Pablo Pueblo returns a Hombre en ...,Questão 01) Pablo Pueblo Regresa un hombre en ...
3,3,"Questão 4) Oye, Pito, ésta es: la vida bruta d...",['A) rejeição da língua utilizada por seus ant...,2020,"Linguagens, códigos e suas tecnologias",E,9,"Question 4) Oye, Pito, Estas Es: La Life of Un...","Questão 4) Oye, Pito, ésta es: la vida bruta d..."
4,4,Questão 5) Poco después apareció en casa de El...,['A) determinação para conduzir discussões pes...,2020,"Linguagens, códigos e suas tecnologias",E,10,Question 5) Poco Después Apareció en House of ...,Questão 5) Poco después apareció en casa de El...
...,...,...,...,...,...,...,...,...,...
1001,1001,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"['A) Arthur, pois a soma que escolheu é a meno...",2011,Matemática e suas tecnologias,C,172,Question 167) In a game played at a pool table...,QuESTÃO 167) Em um jogo disputado em uma mesa ...
1002,1002,QuESTÃO 168) É possível usar água ou comida pa...,"['A) 20 mL.', 'B) 24 mL.', 'C) 100 mL.', 'D) 1...",2011,Matemática e suas tecnologias,C,173,Question 168) It is possible to use water or f...,QuESTÃO 168) É possível usar água ou comida pa...
1003,1003,"QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","['A) 4 mil.', 'B) 9 mil.', 'C) 21 mil.', 'D) 3...",2011,Matemática e suas tecnologias,D,176,"Question 171) In the last five years, 32,000 w...","QuESTÃO 171) Nos últimos cinco anos, 32 mil mu..."
1004,1004,QuESTÃO 174) O setor de recursos humanos de um...,"['A) 24.', 'B) 31.', 'C) 32.', 'D) 88.', 'E) 8...",2011,Matemática e suas tecnologias,E,179,Question 174) The human resources sector of a ...,QuESTÃO 174) O setor de recursos humanos de um...


## Making the requests

In [12]:
def get_text(line : pd.Series, text_add_prompt: str, lang: str):
    """
    """
    if lang == 'pt':
        text = line['text_concat_portuguese']
    else:
        text = line['text_concat_english']
    if text_add_prompt != "":
        text += "\n" + text_add_prompt
#     print(text)
    return text

def make_querie(url : str, text : str, temperature: float = None, stop_tokens: str = None):
    """
    """
    params = {}
    params['query'] = text
    if not(temperature is None):
        params['temperature'] = temperature
    if not(stop_tokens is None):
        params['stop_tokens'] = stop_tokens
    start_time = time.time()
    request = requests.get(url, params = params)

    return_infos = {
        'time_to_run': time.time() - start_time,
        'result': eval(request.text)['result']
    }
    
    return return_infos

def make_all_requests(
        df: pd.DataFrame, 
        models: Dict, 
        temperature: float,
        stop_tokens: str,
        path_to_save: str,
        text_add_prompt: str,
        lang: str,
        base_path: str = "http://localhost:8000",
        verbose: bool = True
    ):
    """
    """
    
    try:
        os.mkdir(path_to_save)
    except:
        pass

    for model,url in models.items():
        
        for index,line in df.iterrows():
            name_arq = f"{line['year']}-{line['index_question']}-{model}.json"
            full_path_arq = f"{path_to_save}/{name_arq}"
            
            # if alredy process, not process
            if os.path.exists(full_path_arq):
                continue
            
            # otherwise compute
            url_req = base_path + url
            
            result = make_querie(
                url = url_req,
                text = get_text(line, text_add_prompt, lang),
                temperature = temperature,
                stop_tokens = stop_tokens
            )
            
            with open(full_path_arq, 'w') as file:
                file.write(json.dumps(result,indent=2,ensure_ascii=False))
            
            if verbose:
                print(f"--> {model} || {index} || {result}")

        print(f"Run the model {model}")
    

In [13]:
models = {
#     "llama_7b":"/models/llama/7b",
#     "llama_13b":"/models/llama/13b",
#     "alpaca_7b": "/models/alpaca/7b",
#     "alpaca_13b": "/models/alpaca/13b",
#     "koala_7b": "/models/koala/7b",
#     "koala_13b": "/models/koala/13b",
#     "vicuna_7b": "/models/vicuna/7b",
    "vicuna_13b": "/models/vicuna/13b"
}

### First experiment. Only concatenating the text and seding to the model and collecting the response
- PT BR
- The models seems to not be capable of explain

In [16]:
PATH_SAVE_DATA = "data/03-explain-response-pt"
LANG = 'pt'
TEXT_ADD_PROMPT = "Responda a pergunta e explique"
BASE_PATH = "http://localhost:8000"

make_all_requests(
    df = data,
    models = models,
    temperature = 0.1,
    stop_tokens = "pergunta:,</s>,\n",
    text_add_prompt = TEXT_ADD_PROMPT,
    path_to_save = PATH_SAVE_DATA,
    lang = LANG,
    base_path = BASE_PATH
)

--> llama_7b || 33 || {'time_to_run': 10.783211469650269, 'result': ''}
--> llama_7b || 34 || {'time_to_run': 25.136221647262573, 'result': ' 58) A estratégia americana é, em sua essência, uma estratégia preventiva, que se baseia na ideia de que o mundo é um lugar perigoso e que os Estados Unidos devem estar preparados para enfrentar qualquer ameaça.'}
--> llama_7b || 35 || {'time_to_run': 10.865356206893921, 'result': ''}
--> llama_7b || 36 || {'time_to_run': 13.635091304779053, 'result': ''}
--> llama_7b || 37 || {'time_to_run': 16.21774387359619, 'result': ''}
--> llama_7b || 38 || {'time_to_run': 14.29270315170288, 'result': ''}
--> llama_7b || 39 || {'time_to_run': 17.15628409385681, 'result': ''}
--> llama_7b || 40 || {'time_to_run': 17.27981400489807, 'result': ''}
--> llama_7b || 41 || {'time_to_run': 31.21723961830139, 'result': ' 65) A partir da década de 1980, o Brasil passou por uma série de reformas que tiveram como objetivo principal reduzir os custos do Estado e aumentar

KeyboardInterrupt: 

### Reading the data processed

In [None]:
def get_response(
    path_jsons: str,
    dataframe: pd.DataFrame,
    models: Dict
):
    """
    """
    for name, _ in models.items():
        aux = []
        for index,line in dataframe.iterrows():
            name_arq = f"{line['year']}-{line['index_question']}-{name}.json"
            
            with open(f"{path_jsons}/{name_arq}",'r') as file:
                json_response = json.loads(file.read())
            
            aux.append(json_response)
        dataframe[f'{name}_response'] = aux
            
    return dataframe

data_results = get_response(
    path_jsons = PATH_SAVE_DATA,
    dataframe = data.copy(),
    models = models
)

In [None]:
data_results

## Defining the heuristics to collect the response of the queries

In [None]:
def first_caracter(text: str):
    """
    """
    text = text.strip()
    if len(text) > 1 and text[0].isupper() and not(text[1].isalnum()) and text[0] in 'ABCDE':
        return text[0]
    if len(text) == 1 and text[0] in 'ABCDE':
        return text[0]

    return None

def identify_alternative_mid_text(text: str):
    """
    """
    regex1 = " [A-E] ?\)? "
    matches1 = re.findall(regex1,text)
    
    regex2 = " [A-E]\)?"
    matches2 = re.findall(regex2,text)
    
    matches = matches1 + matches2
    
    if len(matches) == 1:
        options = "ABCDE"
        for o in options:
            if o in matches[0]:
                return o
    return None


def run_identify_alternative_result(
    dataframe: pd.DataFrame
):
    """
    """
    heuristics = [
        first_caracter,
        identify_alternative_mid_text
    ]
    
    columns = [c for c in dataframe.columns if c.endswith("_response")]
    
    for column in columns:
        results = []
        for index,line in dataframe.iterrows():
            
            result = None
            for heuristic in heuristics:
                out = heuristic(line[column]['result'])
                if not(out is None):
                    result = out
                    break
            
            if not(result is None):
                results.append(result)
            else:
                results.append('-')
        dataframe[column.replace("_response","_prediction")] = results
    return dataframe
    
    

In [None]:
df_predictions = run_identify_alternative_result(data_results)
df_predictions

### How much of the all predictions where detected with the label

In [None]:
def compute_percentage_predictions(
    dataframe: pd.DataFrame
):
    """
    """
    
    columns = [c for c in dataframe.columns if c.endswith("_prediction")]
    
    d = dataframe[columns]
    d = d == '-'
    a,b = d.shape
    d = d.sum().sum()
    
    return 1 - (d / (a*b))

c = compute_percentage_predictions(df_predictions)
print(f"Coverage predictions: {c}")

In [None]:
def verify_results_text(
    dataframe: pd.DataFrame
):
    """
    """
    
    columns = [c for c in dataframe.columns if c.endswith("_prediction")]
    
    for column in columns:
        
        for index,line in dataframe.iterrows():
            
            if line[column] == '-':
                text = line[column.replace("_prediction","_response")]['result']
                print(f"--> {text}")
verify_results_text(df_predictions)

## Computing the accuracy of the models

In [None]:
def accuracy(ground_truth, prediction):
    """
    """
    
    eq = ground_truth == prediction
    
    return eq.sum() / len(eq)

def compute_acc_by_year(
    df_predictions: pd.DataFrame
):
    """
    """
    years = list(set(df_predictions['year']))
    years.sort()
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        model = column.replace("_prediction","")
        aux = [model]
        for year in years:
            
            df_aux = df_predictions[df_predictions['year'] == year]
            
            aux.append(accuracy(df_aux['ground_truth'],df_aux[column]))
        list_output.append(aux)
            
    return pd.DataFrame(list_output,columns=['model']+years)
    
    

In [None]:
accuracy(df_predictions['ground_truth'],df_predictions['vicuna_13b_prediction'])

## Accuracy by year

In [None]:
compute_acc_by_year(df_predictions)

## Accuracy by knowlege area

In [None]:
def compute_acc_by_knowlege_area(
    df_predictions: pd.DataFrame
):
    """
    """
    subjects = list(set(df_predictions['subject']))
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        model = column.replace("_prediction","")
        aux = [model]
        for sub in subjects:
            
            df_aux = df_predictions[df_predictions['subject'] == sub]
            
            aux.append(accuracy(df_aux['ground_truth'],df_aux[column]))
        list_output.append(aux)
            
    return pd.DataFrame(list_output,columns=['model']+subjects)

In [None]:
compute_acc_by_knowlege_area(df_predictions)

## Overall accuracy

In [None]:
def compute_acc_overall(
    df_predictions: pd.DataFrame
):
    """
    """
    subjects = list(set(df_predictions['subject']))
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        list_output.append(accuracy(df_predictions['ground_truth'],df_predictions[column]))
            
    return pd.DataFrame([list_output],columns=[c.replace("_prediction","") for c in columns ])
    
    

In [None]:
compute_acc_overall(df_predictions)