## Notebooks to make the queries to the api of the models to get the response

In [1]:
import pandas as pd
import requests
from typing import Dict
import time
import os
import json
import re

### Reading the data

In [2]:
data = pd.read_csv("../02-dataset/data/cleaned/all_questions_df_english.csv")
data

Unnamed: 0.1,Unnamed: 0,description,alternatives,year,subject,ground_truth,index_question,text_concat_english,text_concat_portuguese
0,0,Questão 4) A Minor Bird I have wished a bird w...,"['A) culpa por não poder cuidar do pássaro.', ...",2020,"Linguagens, códigos e suas tecnologias",D,4,Question 4) Minor Bird I Have Wished a Bird Wo...,Questão 4) A Minor Bird I have wished a bird w...
1,1,"Questão 5) Finally, Aisha finished with her cu...","['A) reforçam um padrão de beleza.', 'B) retra...",2020,"Linguagens, códigos e suas tecnologias",C,5,"Question 5) Finally, Aisha Finished with Her C...","Questão 5) Finally, Aisha finished with her cu..."
2,2,Questão 01) Pablo Pueblo Regresa un hombre en ...,['A) contrapor a individualidade de um sujeito...,2020,"Linguagens, códigos e suas tecnologias",A,6,Question 01) Pablo Pueblo returns a Hombre en ...,Questão 01) Pablo Pueblo Regresa un hombre en ...
3,3,"Questão 4) Oye, Pito, ésta es: la vida bruta d...",['A) rejeição da língua utilizada por seus ant...,2020,"Linguagens, códigos e suas tecnologias",E,9,"Question 4) Oye, Pito, Estas Es: La Life of Un...","Questão 4) Oye, Pito, ésta es: la vida bruta d..."
4,4,Questão 5) Poco después apareció en casa de El...,['A) determinação para conduzir discussões pes...,2020,"Linguagens, códigos e suas tecnologias",E,10,Question 5) Poco Después Apareció en House of ...,Questão 5) Poco después apareció en casa de El...
...,...,...,...,...,...,...,...,...,...
1001,1001,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"['A) Arthur, pois a soma que escolheu é a meno...",2011,Matemática e suas tecnologias,C,172,Question 167) In a game played at a pool table...,QuESTÃO 167) Em um jogo disputado em uma mesa ...
1002,1002,QuESTÃO 168) É possível usar água ou comida pa...,"['A) 20 mL.', 'B) 24 mL.', 'C) 100 mL.', 'D) 1...",2011,Matemática e suas tecnologias,C,173,Question 168) It is possible to use water or f...,QuESTÃO 168) É possível usar água ou comida pa...
1003,1003,"QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","['A) 4 mil.', 'B) 9 mil.', 'C) 21 mil.', 'D) 3...",2011,Matemática e suas tecnologias,D,176,"Question 171) In the last five years, 32,000 w...","QuESTÃO 171) Nos últimos cinco anos, 32 mil mu..."
1004,1004,QuESTÃO 174) O setor de recursos humanos de um...,"['A) 24.', 'B) 31.', 'C) 32.', 'D) 88.', 'E) 8...",2011,Matemática e suas tecnologias,E,179,Question 174) The human resources sector of a ...,QuESTÃO 174) O setor de recursos humanos de um...


## Making the requests

In [3]:
def get_text(line : pd.Series, text_add_prompt: str, lang: str):
    """
    """
    if lang == 'pt':
        text = line['text_concat_portuguese']
    else:
        text = line['text_concat_english']
    if text_add_prompt != "":
        text += "\n" + text_add_prompt
    return text

def make_querie(url : str, text : str, temperature: float = None, stop_tokens: str = None):
    """
    """
    params = {}
    params['query'] = text
    if not(temperature is None):
        params['temperature'] = temperature
    if not(stop_tokens is None):
        params['stop_tokens'] = stop_tokens
    start_time = time.time()
    request = requests.get(url, params = params)

    return_infos = {
        'time_to_run': time.time() - start_time,
        'result': eval(request.text)['result']
    }
    
    return return_infos

def make_all_requests(
        df: pd.DataFrame, 
        models: Dict, 
        temperature: float,
        stop_tokens: str,
        path_to_save: str,
        text_add_prompt: str,
        lang: str,
        base_path: str = "http://localhost:8000",
        verbose: bool = True
    ):
    """
    """
    
    try:
        os.mkdir(path_to_save)
    except:
        pass

    for model,url in models.items():
        
        for index,line in df.iterrows():
            name_arq = f"{line['year']}-{line['index_question']}-{model}.json"
            full_path_arq = f"{path_to_save}/{name_arq}"
            
            # if alredy process, not process
            if os.path.exists(full_path_arq):
                continue
            
            # otherwise compute
            url_req = base_path + url
            
            result = make_querie(
                url = url_req,
                text = get_text(line, text_add_prompt, lang),
                temperature = temperature,
                stop_tokens = stop_tokens
            )
            
            with open(full_path_arq, 'w') as file:
                file.write(json.dumps(result,indent=2,ensure_ascii=False))
            
            if verbose:
                print(f"--> {model} || {index} || {result}")

        print(f"Run the model {model}")
    

In [4]:
models = {
    "llama_7b":"/models/llama/7b",
    "llama_13b":"/models/llama/13b",
    "alpaca_7b": "/models/alpaca/7b",
    "alpaca_13b": "/models/alpaca/13b",
    "koala_7b": "/models/koala/7b",
    "koala_13b": "/models/koala/13b",
    "vicuna_7b": "/models/vicuna/7b",
    "vicuna_13b": "/models/vicuna/13b"
}

### First experiment. Only concatenating the text and seding to the model and collecting the response
- EN

In [6]:
PATH_SAVE_DATA = "data/02-only-question-en"
LANG = 'en'
TEXT_ADD_PROMPT = ""
BASE_PATH = "http://localhost:8000"

make_all_requests(
    df = data,
    models = models,
    temperature = 0.1,
    stop_tokens = "pergunta:,</s>,resposta:,resposta,\n,response,question",
    text_add_prompt = TEXT_ADD_PROMPT,
    path_to_save = PATH_SAVE_DATA,
    lang = LANG,
    base_path = BASE_PATH
)

Run the model llama_7b
Run the model llama_13b
Run the model alpaca_7b
Run the model alpaca_13b
Run the model koala_7b
Run the model koala_13b
Run the model vicuna_7b
Run the model vicuna_13b


# Reading the data processed

In [7]:
def get_response(
    path_jsons: str,
    dataframe: pd.DataFrame,
    models: Dict
):
    """
    """
    for name, _ in models.items():
        aux = []
        for index,line in dataframe.iterrows():
            name_arq = f"{line['year']}-{line['index_question']}-{name}.json"
            
            with open(f"{path_jsons}/{name_arq}",'r') as file:
                json_response = json.loads(file.read())
            
            aux.append(json_response)
        dataframe[f'{name}_response'] = aux
            
    return dataframe

data_results = get_response(
    path_jsons = PATH_SAVE_DATA,
    dataframe = data.copy(),
    models = models
)

In [8]:
data_results

Unnamed: 0.1,Unnamed: 0,description,alternatives,year,subject,ground_truth,index_question,text_concat_english,text_concat_portuguese,llama_7b_response,llama_13b_response,alpaca_7b_response,alpaca_13b_response,koala_7b_response,koala_13b_response,vicuna_7b_response,vicuna_13b_response
0,0,Questão 4) A Minor Bird I have wished a bird w...,"['A) culpa por não poder cuidar do pássaro.', ...",2020,"Linguagens, códigos e suas tecnologias",D,4,Question 4) Minor Bird I Have Wished a Bird Wo...,Questão 4) A Minor Bird I have wished a bird w...,"{'time_to_run': 30.66397261619568, 'result': '...","{'time_to_run': 27.973490715026855, 'result': ...","{'time_to_run': 13.883987665176392, 'result': ...","{'time_to_run': 39.211103439331055, 'result': ...","{'time_to_run': 17.665754795074463, 'result': ...","{'time_to_run': 39.60210204124451, 'result': '...","{'time_to_run': 31.646939277648926, 'result': ...","{'time_to_run': 53.53785514831543, 'result': '..."
1,1,"Questão 5) Finally, Aisha finished with her cu...","['A) reforçam um padrão de beleza.', 'B) retra...",2020,"Linguagens, códigos e suas tecnologias",C,5,"Question 5) Finally, Aisha Finished with Her C...","Questão 5) Finally, Aisha finished with her cu...","{'time_to_run': 82.46242713928223, 'result': '...","{'time_to_run': 50.158398151397705, 'result': ...","{'time_to_run': 27.824440240859985, 'result': ...","{'time_to_run': 60.4620680809021, 'result': ' ...","{'time_to_run': 32.00217628479004, 'result': '...","{'time_to_run': 60.574188470840454, 'result': ...","{'time_to_run': 63.04998183250427, 'result': '...","{'time_to_run': 93.10835218429565, 'result': '..."
2,2,Questão 01) Pablo Pueblo Regresa un hombre en ...,['A) contrapor a individualidade de um sujeito...,2020,"Linguagens, códigos e suas tecnologias",A,6,Question 01) Pablo Pueblo returns a Hombre en ...,Questão 01) Pablo Pueblo Regresa un hombre en ...,"{'time_to_run': 44.06672382354736, 'result': '...","{'time_to_run': 54.891977310180664, 'result': ...","{'time_to_run': 31.62975287437439, 'result': '...","{'time_to_run': 34.93287897109985, 'result': '...","{'time_to_run': 19.16502833366394, 'result': '...","{'time_to_run': 36.23348093032837, 'result': '...","{'time_to_run': 36.763997316360474, 'result': ...","{'time_to_run': 51.55441474914551, 'result': '..."
3,3,"Questão 4) Oye, Pito, ésta es: la vida bruta d...",['A) rejeição da língua utilizada por seus ant...,2020,"Linguagens, códigos e suas tecnologias",E,9,"Question 4) Oye, Pito, Estas Es: La Life of Un...","Questão 4) Oye, Pito, ésta es: la vida bruta d...","{'time_to_run': 38.82513642311096, 'result': '...","{'time_to_run': 26.148003578186035, 'result': ...","{'time_to_run': 15.944700717926025, 'result': ...","{'time_to_run': 34.05571150779724, 'result': '...","{'time_to_run': 18.168598890304565, 'result': ...","{'time_to_run': 34.41255474090576, 'result': '...","{'time_to_run': 33.808202505111694, 'result': ...","{'time_to_run': 50.03341579437256, 'result': '..."
4,4,Questão 5) Poco después apareció en casa de El...,['A) determinação para conduzir discussões pes...,2020,"Linguagens, códigos e suas tecnologias",E,10,Question 5) Poco Después Apareció en House of ...,Questão 5) Poco después apareció en casa de El...,"{'time_to_run': 99.42807507514954, 'result': '...","{'time_to_run': 38.656933307647705, 'result': ...","{'time_to_run': 27.935928344726562, 'result': ...","{'time_to_run': 44.073745012283325, 'result': ...","{'time_to_run': 23.80483651161194, 'result': '...","{'time_to_run': 45.07430052757263, 'result': '...","{'time_to_run': 45.98860740661621, 'result': '...","{'time_to_run': 67.6828727722168, 'result': ' ..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,1001,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"['A) Arthur, pois a soma que escolheu é a meno...",2011,Matemática e suas tecnologias,C,172,Question 167) In a game played at a pool table...,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"{'time_to_run': 18.197285652160645, 'result': ...","{'time_to_run': 45.8889217376709, 'result': ' ...","{'time_to_run': 26.179356575012207, 'result': ...","{'time_to_run': 47.77274799346924, 'result': '...","{'time_to_run': 21.043180465698242, 'result': ...","{'time_to_run': 89.5503556728363, 'result': ' ...","{'time_to_run': 26.232719898223877, 'result': ...","{'time_to_run': 57.654850006103516, 'result': ..."
1002,1002,QuESTÃO 168) É possível usar água ou comida pa...,"['A) 20 mL.', 'B) 24 mL.', 'C) 100 mL.', 'D) 1...",2011,Matemática e suas tecnologias,C,173,Question 168) It is possible to use water or f...,QuESTÃO 168) É possível usar água ou comida pa...,"{'time_to_run': 15.837353944778442, 'result': ...","{'time_to_run': 33.08618950843811, 'result': '...","{'time_to_run': 21.459909677505493, 'result': ...","{'time_to_run': 34.769564390182495, 'result': ...","{'time_to_run': 18.50494122505188, 'result': '...","{'time_to_run': 69.82431030273438, 'result': '...","{'time_to_run': 18.28632140159607, 'result': '...","{'time_to_run': 53.47907495498657, 'result': '..."
1003,1003,"QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","['A) 4 mil.', 'B) 9 mil.', 'C) 21 mil.', 'D) 3...",2011,Matemática e suas tecnologias,D,176,"Question 171) In the last five years, 32,000 w...","QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","{'time_to_run': 9.650543689727783, 'result': '...","{'time_to_run': 17.193283081054688, 'result': ...","{'time_to_run': 9.774365186691284, 'result': '...","{'time_to_run': 21.518895864486694, 'result': ...","{'time_to_run': 11.304681301116943, 'result': ...","{'time_to_run': 43.130900382995605, 'result': ...","{'time_to_run': 11.209919214248657, 'result': ...","{'time_to_run': 32.85961127281189, 'result': '..."
1004,1004,QuESTÃO 174) O setor de recursos humanos de um...,"['A) 24.', 'B) 31.', 'C) 32.', 'D) 88.', 'E) 8...",2011,Matemática e suas tecnologias,E,179,Question 174) The human resources sector of a ...,QuESTÃO 174) O setor de recursos humanos de um...,"{'time_to_run': 8.560076475143433, 'result': '...","{'time_to_run': 44.94996476173401, 'result': '...","{'time_to_run': 8.834339618682861, 'result': '...","{'time_to_run': 18.841432332992554, 'result': ...","{'time_to_run': 10.02408242225647, 'result': '...","{'time_to_run': 37.720765829086304, 'result': ...","{'time_to_run': 9.832234859466553, 'result': '...","{'time_to_run': 28.73652172088623, 'result': '..."


## Defining the heuristics to collect the response of the queries

In [28]:
def first_caracter(text: str):
    """
    """
    text = text.strip()
    if len(text) > 1 and text[0].isupper() and not(text[1].isalnum()) and text[0] in 'ABCDE':
        return text[0]
    if len(text) == 1 and text[0] in 'ABCDE':
        return text[0]

    return None

def identify_alternative_mid_text(text: str):
    """
    """
    regex1 = " [A-E] ?\)? "
    matches1 = re.findall(regex1,text)
    
    regex2 = " [A-E]\)?"
    matches2 = re.findall(regex2,text)
    
    matches = matches1 + matches2
    
    if len(matches) == 1:
        options = "ABCDE"
        for o in options:
            if o in matches[0]:
                return o
    return None

def identify_upper_letter(text: str):
    """
    """
    text = text.replace(")","")
    text = text.replace('.',' ')
    text = text.replace(',',' ')
    tokens = text.split(' ')
    responses = []
    for t in tokens:
        if t in ['A','B','C','D','E']:
            responses.append(t)
    if len(responses) == 1:
        return responses[0]
    return None

def run_identify_alternative_result(
    dataframe: pd.DataFrame
):
    """
    """
    heuristics = [
        first_caracter,
        identify_alternative_mid_text,
        identify_upper_letter
    ]
    
    columns = [c for c in dataframe.columns if c.endswith("_response")]
    
    for column in columns:
        results = []
        for index,line in dataframe.iterrows():
            
            result = None
            for heuristic in heuristics:
                out = heuristic(line[column]['result'])
                if not(out is None):
                    result = out
                    break
            
            if not(result is None):
                results.append(result)
            else:
                results.append('-')
        dataframe[column.replace("_response","_prediction")] = results
    return dataframe
    
    

In [29]:
df_predictions = run_identify_alternative_result(data_results)
df_predictions

Unnamed: 0.1,Unnamed: 0,description,alternatives,year,subject,ground_truth,index_question,text_concat_english,text_concat_portuguese,llama_7b_response,...,vicuna_7b_response,vicuna_13b_response,llama_7b_prediction,llama_13b_prediction,alpaca_7b_prediction,alpaca_13b_prediction,koala_7b_prediction,koala_13b_prediction,vicuna_7b_prediction,vicuna_13b_prediction
0,0,Questão 4) A Minor Bird I have wished a bird w...,"['A) culpa por não poder cuidar do pássaro.', ...",2020,"Linguagens, códigos e suas tecnologias",D,4,Question 4) Minor Bird I Have Wished a Bird Wo...,Questão 4) A Minor Bird I have wished a bird w...,"{'time_to_run': 30.66397261619568, 'result': '...",...,"{'time_to_run': 31.646939277648926, 'result': ...","{'time_to_run': 53.53785514831543, 'result': '...",E,A,D,A,A,C,C,B
1,1,"Questão 5) Finally, Aisha finished with her cu...","['A) reforçam um padrão de beleza.', 'B) retra...",2020,"Linguagens, códigos e suas tecnologias",C,5,"Question 5) Finally, Aisha Finished with Her C...","Questão 5) Finally, Aisha finished with her cu...","{'time_to_run': 82.46242713928223, 'result': '...",...,"{'time_to_run': 63.04998183250427, 'result': '...","{'time_to_run': 93.10835218429565, 'result': '...",C,B,B,B,C,C,C,B
2,2,Questão 01) Pablo Pueblo Regresa un hombre en ...,['A) contrapor a individualidade de um sujeito...,2020,"Linguagens, códigos e suas tecnologias",A,6,Question 01) Pablo Pueblo returns a Hombre en ...,Questão 01) Pablo Pueblo Regresa un hombre en ...,"{'time_to_run': 44.06672382354736, 'result': '...",...,"{'time_to_run': 36.763997316360474, 'result': ...","{'time_to_run': 51.55441474914551, 'result': '...",D,A,-,B,A,A,D,B
3,3,"Questão 4) Oye, Pito, ésta es: la vida bruta d...",['A) rejeição da língua utilizada por seus ant...,2020,"Linguagens, códigos e suas tecnologias",E,9,"Question 4) Oye, Pito, Estas Es: La Life of Un...","Questão 4) Oye, Pito, ésta es: la vida bruta d...","{'time_to_run': 38.82513642311096, 'result': '...",...,"{'time_to_run': 33.808202505111694, 'result': ...","{'time_to_run': 50.03341579437256, 'result': '...",B,C,A,A,A,A,E,D
4,4,Questão 5) Poco después apareció en casa de El...,['A) determinação para conduzir discussões pes...,2020,"Linguagens, códigos e suas tecnologias",E,10,Question 5) Poco Después Apareció en House of ...,Questão 5) Poco después apareció en casa de El...,"{'time_to_run': 99.42807507514954, 'result': '...",...,"{'time_to_run': 45.98860740661621, 'result': '...","{'time_to_run': 67.6828727722168, 'result': ' ...",E,A,-,D,A,A,D,D
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1001,1001,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"['A) Arthur, pois a soma que escolheu é a meno...",2011,Matemática e suas tecnologias,C,172,Question 167) In a game played at a pool table...,QuESTÃO 167) Em um jogo disputado em uma mesa ...,"{'time_to_run': 18.197285652160645, 'result': ...",...,"{'time_to_run': 26.232719898223877, 'result': ...","{'time_to_run': 57.654850006103516, 'result': ...",E,-,E,D,A,A,D,-
1002,1002,QuESTÃO 168) É possível usar água ou comida pa...,"['A) 20 mL.', 'B) 24 mL.', 'C) 100 mL.', 'D) 1...",2011,Matemática e suas tecnologias,C,173,Question 168) It is possible to use water or f...,QuESTÃO 168) É possível usar água ou comida pa...,"{'time_to_run': 15.837353944778442, 'result': ...",...,"{'time_to_run': 18.28632140159607, 'result': '...","{'time_to_run': 53.47907495498657, 'result': '...",B,D,A,A,A,A,D,A
1003,1003,"QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","['A) 4 mil.', 'B) 9 mil.', 'C) 21 mil.', 'D) 3...",2011,Matemática e suas tecnologias,D,176,"Question 171) In the last five years, 32,000 w...","QuESTÃO 171) Nos últimos cinco anos, 32 mil mu...","{'time_to_run': 9.650543689727783, 'result': '...",...,"{'time_to_run': 11.209919214248657, 'result': ...","{'time_to_run': 32.85961127281189, 'result': '...",B,D,A,D,A,C,D,D
1004,1004,QuESTÃO 174) O setor de recursos humanos de um...,"['A) 24.', 'B) 31.', 'C) 32.', 'D) 88.', 'E) 8...",2011,Matemática e suas tecnologias,E,179,Question 174) The human resources sector of a ...,QuESTÃO 174) O setor de recursos humanos de um...,"{'time_to_run': 8.560076475143433, 'result': '...",...,"{'time_to_run': 9.832234859466553, 'result': '...","{'time_to_run': 28.73652172088623, 'result': '...",D,-,A,D,A,A,D,D


### How much of the all predictions where detected with the label

In [30]:
def compute_percentage_predictions(
    dataframe: pd.DataFrame
):
    """
    """
    
    columns = [c for c in dataframe.columns if c.endswith("_prediction")]
    
    d = dataframe[columns]
    d = d == '-'
    a,b = d.shape
    d = d.sum().sum()
    
    return 1 - (d / (a*b))

c = compute_percentage_predictions(df_predictions)
print(f"Coverage predictions: {c}")

Coverage predictions: 0.944955268389662


In [31]:
def verify_results_text(
    dataframe: pd.DataFrame
):
    """
    """
    
    columns = [c for c in dataframe.columns if c.endswith("_prediction")]
    
    for column in columns:
        
        for index,line in dataframe.iterrows():
            
            if line[column] == '-':
                text = line[column.replace("_prediction","_response")]['result']
                print(f"--> {text}")
verify_results_text(df_predictions)

--> 
-->  Answer 19) A, B, C, D, E
-->  134) Art. 2º The child is considered, for the purposes of this law, the person up to twelve years old, and adolescents between twelve and eighteen years old. [...] Art. 3º The child and adolescent enjoy all the fundamental rights inherent to the human person, without prejudice to the integral protection referred to in this law, assuring them, by law or other means, all opportunities and facilities, in order to They provide them with physical, mental, moral, spiritual and social development in conditions of freedom and dignity.
--> 
--> 
--> 
--> 
-->  01) A) 02) B) 03) C) 04) D) 05) E)
-->  12
-->  Answer 58) TEXT I In March 1889, when the first pilgrimages attracted by the miracles of the Blessed Maria de Araújo appeared, Juazeiro was inserted in the list of the foundation of the religious space. Another center was built, such as Aparecida do Norte, Canindé or Lourdes.
-->  QUESTION 96) La Realidad Cultural Y La Serving Credent Of Migrants Of La

--> 
--> 
-->  Erro (A) Allows the narrative to be objective and full of denotative meanings.
-->  Texto I e II apresentam posições que se opõem em relação à reforma agrária. Isso ocorre porque os autores associam a reforma agrária, respectivamente, com as seguintes ideias:
-->  Deliberative democracy is that parts of the political conflict must deliberate among themselves and, through reasonable argument, try to reach an agreement on policies that are satisfactory to all. Activist democracy suspiciously of exhortations to deliberation because it believes that in the real world of politics, where structural inequalities influence procedures and results, democratic processes that seem to fulfill deliberation norms often tend to benefit the most powerful agents. It therefore recommends that those who care about the promotion of more justice should mainly perform critical opposition activity, rather than trying to reach an agreement with those who support existing power structures or bene

## Computing the accuracy of the models

In [32]:
def accuracy(ground_truth, prediction):
    """
    """
    
    eq = ground_truth == prediction
    
    return eq.sum() / len(eq)

def compute_acc_by_year(
    df_predictions: pd.DataFrame
):
    """
    """
    years = list(set(df_predictions['year']))
    years.sort()
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        model = column.replace("_prediction","")
        aux = [model]
        for year in years:
            
            df_aux = df_predictions[df_predictions['year'] == year]
            
            aux.append(accuracy(df_aux['ground_truth'],df_aux[column]))
        list_output.append(aux)
            
    return pd.DataFrame(list_output,columns=['model']+years)
    
    

In [33]:
accuracy(df_predictions['ground_truth'],df_predictions['vicuna_13b_prediction'])

0.3628230616302187

## Accuracy by year

In [34]:
compute_acc_by_year(df_predictions)

Unnamed: 0,model,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2022
0,llama_7b,0.182796,0.19,0.255319,0.283582,0.222222,0.247788,0.173913,0.381818,0.163636,0.272727,0.147541
1,llama_13b,0.225806,0.2,0.170213,0.298507,0.161616,0.168142,0.165217,0.090909,0.154545,0.222222,0.180328
2,alpaca_7b,0.086022,0.2,0.095745,0.149254,0.191919,0.185841,0.130435,0.109091,0.154545,0.111111,0.180328
3,alpaca_13b,0.397849,0.33,0.276596,0.358209,0.373737,0.318584,0.278261,0.327273,0.290909,0.333333,0.377049
4,koala_7b,0.172043,0.26,0.191489,0.134328,0.171717,0.168142,0.217391,0.090909,0.172727,0.20202,0.245902
5,koala_13b,0.354839,0.35,0.276596,0.343284,0.292929,0.318584,0.278261,0.309091,0.272727,0.393939,0.459016
6,vicuna_7b,0.344086,0.36,0.319149,0.402985,0.363636,0.362832,0.295652,0.418182,0.390909,0.444444,0.491803
7,vicuna_13b,0.397849,0.35,0.37234,0.477612,0.373737,0.327434,0.304348,0.454545,0.336364,0.343434,0.344262


## Accuracy by knowlege area

In [35]:
def compute_acc_by_knowlege_area(
    df_predictions: pd.DataFrame
):
    """
    """
    subjects = list(set(df_predictions['subject']))
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        model = column.replace("_prediction","")
        aux = [model]
        for sub in subjects:
            
            df_aux = df_predictions[df_predictions['subject'] == sub]
            
            aux.append(accuracy(df_aux['ground_truth'],df_aux[column]))
        list_output.append(aux)
            
    return pd.DataFrame(list_output,columns=['model']+subjects)

In [36]:
compute_acc_by_knowlege_area(df_predictions)

Unnamed: 0,model,Matemática e suas tecnologias,Ciências humanas e suas tecnologias,Ciências da natureza e suas tecnologias,"Linguagens, códigos e suas tecnologias"
0,llama_7b,0.179487,0.236686,0.231441,0.222615
1,llama_13b,0.160256,0.174556,0.209607,0.190813
2,alpaca_7b,0.102564,0.153846,0.152838,0.155477
3,alpaca_13b,0.153846,0.393491,0.349345,0.332155
4,koala_7b,0.141026,0.204142,0.170306,0.208481
5,koala_13b,0.128205,0.384615,0.310044,0.378092
6,vicuna_7b,0.224359,0.476331,0.323144,0.374558
7,vicuna_13b,0.192308,0.440828,0.344978,0.378092


## Overall accuracy

In [37]:
def compute_acc_overall(
    df_predictions: pd.DataFrame
):
    """
    """
    subjects = list(set(df_predictions['subject']))
    
    columns = [c for c in df_predictions.columns if c.endswith('_prediction')]
    
    list_output = []
    
    
    for column in columns:
        list_output.append(accuracy(df_predictions['ground_truth'],df_predictions[column]))
            
    return pd.DataFrame([list_output],columns=[c.replace("_prediction","") for c in columns ])
    
    

In [38]:
compute_acc_overall(df_predictions)

Unnamed: 0,llama_7b,llama_13b,alpaca_7b,alpaca_13b,koala_7b,koala_13b,vicuna_7b,vicuna_13b
0,0.222664,0.184891,0.146123,0.329026,0.187873,0.326044,0.373757,0.362823
