In [1]:
import os
import json
import random
import pickle
import tarfile
import requests
import pandas as pd
from tqdm import tqdm
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_relevancy
from groq import Groq
import time

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
GROQ_API_KEY = Groq(api_key=os.environ.get("GROQ_API_KEY"))

In [6]:

def load_data(sample_fraction=0.01):
    data_directory = 'data'
    articles_file = os.path.join(data_directory, "context_articles.json")
    questions_file = os.path.join(data_directory, "test_questions.json")

    if os.path.exists(articles_file) and os.path.exists(questions_file):
        with open(articles_file, 'r') as f:
            context_articles = json.load(f)
        with open(questions_file, 'r') as f:
            test_set = json.load(f)
        
        # Amostra uma fração dos dados para testes rápidos
        if sample_fraction < 10.0:
            sample_size = max(1, int(len(test_set) * sample_fraction))
            test_set = random.sample(test_set, sample_size)

        return context_articles, test_set
    else:
        print("Files not found, check your dataset path.")
        return None, None

def process_and_evaluate(context_articles, test_set):
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    if not os.path.exists(data_directory):
        os.makedirs(data_directory)

    qa = {'question': [], 'ground_truth': [], 'contexts': [], 'answer': [],
          'faithfulness': [], 'answer_relevancy': [], 'context_relevancy': []}

    with tqdm(total=len(test_set)) as pbar:
        for entry in test_set:
            result = {'faithfulness': random.random(), 'answer_relevancy': random.random(), 'context_relevancy': random.random()}
            qa['question'].append(entry['question'])
            qa['ground_truth'].append(entry.get('ground_truth', 'No ground truth provided'))
            qa['contexts'].append(context_articles)  # This might need optimization if too large
            qa['answer'].append(entry['answer'])

            for key in result:
                qa[key].append(result[key])

            pbar.update(1)

    with open(save_file, 'wb') as f:
        pickle.dump(qa, f)

def load_and_display_results():
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    if os.path.exists(save_file):
        with open(save_file, 'rb') as f:
            data = pickle.load(f)
        df = pd.DataFrame(data)
        print(df)
    else:
        print(f"File {save_file} not found.")

if __name__ == "__main__":
    context_articles, test_set = load_data(sample_fraction=0.01)  # Adjust fraction as needed for speed
    if context_articles and test_set:
        process_and_evaluate(context_articles, test_set)
        load_and_display_results()


100%|██████████| 1/1 [00:00<00:00, 4760.84it/s]


                                         question              ground_truth  \
0  What stadium do the Chicago White Sox play in?  No ground truth provided   

                                            contexts  \
0  {'san diego padres': 'The San Diego Padres are...   

                                              answer  faithfulness  \
0  {'type': 'span', 'answer_spans': [{'text': ' G...      0.674774   

   answer_relevancy  context_relevancy  
0          0.175383           0.213943  


In [44]:
import os
import json
import pickle
import random
import time
from tqdm import tqdm
import pandas as pd

def load_data():
    data_directory = 'data'
    articles_file = os.path.join(data_directory, "context_articles.json")
    questions_file = os.path.join(data_directory, "test_questions.json")

    # Carregar artigos de contexto e conjunto de teste
    if os.path.exists(articles_file) and os.path.exists(questions_file):
        with open(articles_file, 'r') as f:
            context_articles = json.load(f)
        with open(questions_file, 'r') as f:
            test_set = json.load(f)
        return context_articles, test_set
    else:
        print("Files not found, check your dataset path.")
        return None, None

def process_and_evaluate(context_articles, test_set):
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    # Garantindo que o diretório de dados exista
    os.makedirs(data_directory, exist_ok=True)

    # Carregando ou inicializando o dicionário de QA
    if os.path.exists(save_file):
        with open(save_file, 'rb') as f:
            qa = pickle.load(f)
    else:
        qa = {
            'question': [],
            'ground_truth': [],
            'contexts': [],
            'answer': [],
            'faithfulness': [],
            'answer_relevancy': [],
            'context_relevancy': []
        }

    # Assumindo que 'test_set' é uma lista de dicionários
    with tqdm(total=len(test_set)) as pbar:
        for entry in test_set:
            if entry['question'] not in qa['question']:
                try:
                    # Simulação de avaliação
                    result = {'faithfulness': random.random(), 'answer_relevancy': random.random(), 'context_relevancy': random.random()}
                    
                    qa['question'].append(entry['question'])
                    qa['ground_truth'].append(entry['ground_truth'])
                    qa['contexts'].append(context_articles)  # Exemplo simplificado
                    qa['answer'].append(entry['answer'])
                    for key in result:
                        qa[key].append(result[key])

                    if len(qa['question']) % 5 == 0:
                        with open(save_file, 'wb') as f:
                            pickle.dump(qa, f)
                except Exception as e:
                    print(f"Error at index {len(qa['question'])}: {e}")
                    time.sleep(120)
                pbar.update(1)

        # Salvando os dados finais
        with open(save_file, 'wb') as f:
            pickle.dump(qa, f)

def load_and_display_results():
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    if os.path.exists(save_file):
        with open(save_file, 'rb') as f:
            data = pickle.load(f)
        df = pd.DataFrame(data)
        print(df)
    else:
        print(f"Arquivo {save_file} não encontrado.")

if __name__ == "__main__":
    context_articles, test_set = load_data()
    if context_articles and test_set:
        process_and_evaluate(context_articles, test_set)
        load_and_display_results()


  0%|          | 0/50 [00:00<?, ?it/s]

Error at index 2: 'ground_truth'


  2%|▏         | 1/50 [02:00<1:38:00, 120.00s/it]

Error at index 3: 'ground_truth'


  4%|▍         | 2/50 [04:00<1:36:00, 120.01s/it]

Error at index 4: 'ground_truth'


  6%|▌         | 3/50 [06:00<1:34:00, 120.01s/it]

Error at index 5: 'ground_truth'


  8%|▊         | 4/50 [08:00<1:32:00, 120.01s/it]

Error at index 6: 'ground_truth'


 10%|█         | 5/50 [10:00<1:30:00, 120.01s/it]

Error at index 7: 'ground_truth'


 12%|█▏        | 6/50 [12:00<1:28:00, 120.01s/it]

Error at index 8: 'ground_truth'


 14%|█▍        | 7/50 [14:00<1:26:00, 120.01s/it]

Error at index 9: 'ground_truth'


 16%|█▌        | 8/50 [16:00<1:24:00, 120.01s/it]

Error at index 10: 'ground_truth'


 18%|█▊        | 9/50 [18:00<1:22:00, 120.01s/it]

Error at index 11: 'ground_truth'


 20%|██        | 10/50 [20:00<1:20:00, 120.01s/it]

Error at index 12: 'ground_truth'


 22%|██▏       | 11/50 [22:00<1:18:00, 120.01s/it]

Error at index 13: 'ground_truth'


 24%|██▍       | 12/50 [24:00<1:16:00, 120.01s/it]

Error at index 14: 'ground_truth'


 26%|██▌       | 13/50 [26:00<1:14:00, 120.01s/it]

Error at index 15: 'ground_truth'


 28%|██▊       | 14/50 [28:00<1:12:00, 120.01s/it]

Error at index 16: 'ground_truth'


 30%|███       | 15/50 [30:00<1:10:00, 120.01s/it]

Error at index 17: 'ground_truth'


 32%|███▏      | 16/50 [32:00<1:08:00, 120.01s/it]

Error at index 18: 'ground_truth'


 34%|███▍      | 17/50 [34:00<1:06:00, 120.01s/it]

Error at index 19: 'ground_truth'


 36%|███▌      | 18/50 [36:00<1:04:00, 120.00s/it]

Error at index 20: 'ground_truth'


 38%|███▊      | 19/50 [38:00<1:02:00, 120.00s/it]

Error at index 21: 'ground_truth'


 40%|████      | 20/50 [40:00<1:00:00, 120.01s/it]

Error at index 22: 'ground_truth'


 42%|████▏     | 21/50 [42:00<58:00, 120.01s/it]  

Error at index 23: 'ground_truth'


 44%|████▍     | 22/50 [44:00<56:00, 120.01s/it]

Error at index 24: 'ground_truth'


 46%|████▌     | 23/50 [46:00<54:00, 120.01s/it]

Error at index 25: 'ground_truth'


 48%|████▊     | 24/50 [48:00<52:00, 120.01s/it]

Error at index 26: 'ground_truth'


 50%|█████     | 25/50 [50:00<50:01, 120.05s/it]

Error at index 27: 'ground_truth'


 52%|█████▏    | 26/50 [52:00<48:00, 120.03s/it]

Error at index 28: 'ground_truth'


 54%|█████▍    | 27/50 [54:00<46:00, 120.03s/it]

Error at index 29: 'ground_truth'


 56%|█████▌    | 28/50 [56:00<44:00, 120.02s/it]

Error at index 30: 'ground_truth'


 58%|█████▊    | 29/50 [58:00<42:00, 120.02s/it]

Error at index 31: 'ground_truth'


 60%|██████    | 30/50 [1:00:00<40:00, 120.01s/it]

Error at index 32: 'ground_truth'


 62%|██████▏   | 31/50 [1:02:00<38:00, 120.01s/it]

Error at index 33: 'ground_truth'


 64%|██████▍   | 32/50 [1:04:00<36:00, 120.01s/it]

Error at index 34: 'ground_truth'


 66%|██████▌   | 33/50 [1:06:00<34:00, 120.01s/it]

Error at index 35: 'ground_truth'


 68%|██████▊   | 34/50 [1:08:00<32:00, 120.01s/it]

Error at index 36: 'ground_truth'


 70%|███████   | 35/50 [1:10:00<30:00, 120.01s/it]

Error at index 37: 'ground_truth'


 72%|███████▏  | 36/50 [1:12:00<28:00, 120.02s/it]

Error at index 38: 'ground_truth'


 74%|███████▍  | 37/50 [1:14:00<26:00, 120.02s/it]

Error at index 39: 'ground_truth'


 76%|███████▌  | 38/50 [1:16:00<24:00, 120.02s/it]

Error at index 40: 'ground_truth'


 78%|███████▊  | 39/50 [1:18:00<22:00, 120.02s/it]

Error at index 41: 'ground_truth'


 80%|████████  | 40/50 [1:20:00<20:00, 120.01s/it]

Error at index 42: 'ground_truth'


 82%|████████▏ | 41/50 [1:22:00<18:00, 120.01s/it]

Error at index 43: 'ground_truth'


 84%|████████▍ | 42/50 [1:24:00<16:00, 120.01s/it]

Error at index 44: 'ground_truth'


 86%|████████▌ | 43/50 [1:26:00<14:00, 120.01s/it]

Error at index 45: 'ground_truth'


 88%|████████▊ | 44/50 [1:28:00<12:00, 120.01s/it]

Error at index 46: 'ground_truth'


 90%|█████████ | 45/50 [1:30:00<10:00, 120.01s/it]

Error at index 47: 'ground_truth'


 92%|█████████▏| 46/50 [1:32:00<08:00, 120.01s/it]

Error at index 48: 'ground_truth'


 94%|█████████▍| 47/50 [1:34:00<06:00, 120.01s/it]

Error at index 49: 'ground_truth'


 96%|█████████▌| 48/50 [1:36:00<04:00, 120.01s/it]

Error at index 50: 'ground_truth'


 98%|█████████▊| 49/50 [1:38:00<02:00, 120.01s/it]

Error at index 51: 'ground_truth'


100%|██████████| 50/50 [1:40:00<00:00, 120.01s/it]


ValueError: All arrays must be of the same length

In [15]:
def setup_dataset():
    DATA_DIR = 'data'
    if not os.path.exists(DATA_DIR):
        os.makedirs(DATA_DIR)

    context_articles = "https://iirc-dataset.s3.us-west-2.amazonaws.com/context_articles.tar.gz"
    filename = 'context_articles.tar.gz'
    filepath = os.path.join(DATA_DIR, filename)

    if not os.path.exists(filepath):
        print(f"Downloading {filename}...")
        response = requests.get(context_articles)
        with open(filepath, 'wb') as f:
            f.write(response.content)
        print(f"{filename} downloaded.")
        
        with tarfile.open(filepath, 'r:gz') as tar:
            tar.extractall(path=DATA_DIR)
        print(f"{filename} extracted.")


In [37]:
import os
import pickle
import random
import time
from tqdm import tqdm

def process_and_evaluate():
    # Definindo o diretório e o arquivo de dados
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    # Garantindo que o diretório de dados exista
    os.makedirs(data_directory, exist_ok=True)

    # Carregando ou inicializando o dicionário de QA
    if os.path.exists(save_file):
        with open(save_file, 'rb') as f:
            qa = pickle.load(f)
    else:
        qa = {
            'question': [],
            'ground_truth': [],
            'contexts': [],
            'answer': [],
            'faithfulness': [],
            'answer_relevancy': [],
            'context_relevancy': []
        }

    # Simulando dados de entrada para processamento
    dataset = [
        {'question': "What is AI?", 'ground_truth': "Study of intelligent agents.", 'contexts': "AI is used in various fields.", 'answer': "AI stands for Artificial Intelligence."}
    ]

    with tqdm(total=len(dataset)) as pbar:
        for entry in dataset:
            if entry['question'] not in qa['question']:
                try:
                    # Simulação de avaliação
                    result = {'faithfulness': random.random(), 'answer_relevancy': random.random(), 'context_relevancy': random.random()}
                    
                    qa['question'].append(entry['question'])
                    qa['ground_truth'].append(entry['ground_truth'])
                    qa['contexts'].append(entry['contexts'])
                    qa['answer'].append(entry['answer'])
                    for key in result:
                        qa[key].append(result[key])

                    if len(qa['question']) % 5 == 0:
                        with open(save_file, 'wb') as f:
                            pickle.dump(qa, f)
                except Exception as e:
                    print(f"Error at index {len(qa['question'])}: {e}")
                    time.sleep(120)
                pbar.update(1)

        with open(save_file, 'wb') as f:
            pickle.dump(qa, f)


In [38]:
def load_and_display_results():
    data_directory = 'data'
    data_file = "qa_data.pickle"
    save_file = os.path.join(data_directory, data_file)

    if os.path.exists(save_file):
        with open(save_file, 'rb') as f:
            data = pickle.load(f)
        df = pd.DataFrame(data)
        print(df)
    else:
        print(f"Arquivo {save_file} não encontrado.")

In [39]:
if __name__ == "__main__":
    setup_dataset()
    process_and_evaluate()
    # Carregar e exibir os resultados
    with open('qa_data.pickle', 'rb') as f:
        data = pickle.load(f)
    df = pd.DataFrame(data)
    print(df.describe())
    df.hist(figsize=(10,8), layout=(1,3))


  0%|          | 0/1 [00:00<?, ?it/s]


FileNotFoundError: [Errno 2] No such file or directory: 'qa_data.pickle'

In [40]:
#Adaptado do Ramon Simoes
save_file = "qa_data.pickle"

if os.path.exists(save_file):
    with open(save_file, 'rb') as f:
        qa = pickle.load(f)
else:
    qa = {'question': [], 'ground_truth': [], 'contexts': [], 'answer': [],
          'faithfulness': [], 'answer_relevancy': [], 'context_relevancy': []
         }

last_processed_index = 0
with tqdm(total=len(dataset)) as pbar:
    while last_processed_index < len(dataset):

        q = Dataset.from_dict(dataset[last_processed_index : last_processed_index + 1 ])

        if q['question'] in qa['question']:
            last_processed_index += 1
            pbar.update(1)
            continue

        try:
            qa['question'].append(q['question'])
            qa['ground_truth'].append(q['ground_truth'])
            qa['contexts'].append(q['contexts'])
            qa['answer'].append(q['answer'])

            result = evaluate(q, metrics, llm=llm, embeddings=embedder)

            for r in result:
                qa[r].append(result[r])

            if len(qa['question']) % 5 == 0:
                with open(save_file, 'wb') as f:
                    pickle.dump(qa, f)

                sleep_time = random.uniform(5, 20)
                time.sleep(sleep_time)

            last_processed_index += 1
            pbar.update(1)

        except Exception as e:
            print(f"\nErro no indice {last_processed_index}: {e}")
            print("Rate limit excedido... esperando 120s")
            time.sleep(120)

with open(save_file, 'wb') as f:
    pickle.dump(qa, f)

NameError: name 'dataset' is not defined